refactor(assets): merge AssetInfo and AssetCacheState into AssetReference

This change solves the basename collision bug by using UNIQUE(file_path) on the
unified asset_references table. Key changes:

Database:
- Migration 0005 merges asset_cache_states and asset_infos into asset_references
- AssetReference now contains: cache state fields (file_path, mtime_ns, needs_verify,
  is_missing, enrichment_level) plus info fields (name, owner_id, preview_id, etc.)
- AssetReferenceMeta replaces AssetInfoMeta
- AssetReferenceTag replaces AssetInfoTag
- UNIQUE constraint on file_path prevents duplicate entries for same file

Code:
- New unified query module: asset_reference.py (replaces asset_info.py, cache_state.py)
- Updated scanner, seeder, and services to use AssetReference
- Updated API routes to use reference_id instead of asset_info_id

Tests:
- All 175 unit tests updated and passing
- Integration tests require server environment (not run here)

Amp-Thread-ID: https://ampcode.com/threads/T-019c4fe8-9dcb-75ce-bea8-ea786343a581
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-11 20:03:10 -08:00
parent 5ff6bf7a83
commit 8ff4d38ad1
36 changed files with 3191 additions and 2327 deletions

View File

@@ -3,24 +3,24 @@ import uuid
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
from app.assets.database.queries import (
asset_info_exists_for_asset_id,
get_asset_info_by_id,
insert_asset_info,
get_or_create_asset_info,
update_asset_info_timestamps,
list_asset_infos_page,
fetch_asset_info_asset_and_tags,
fetch_asset_info_and_asset,
update_asset_info_access_time,
set_asset_info_metadata,
delete_asset_info_by_id,
set_asset_info_preview,
bulk_insert_asset_infos_ignore_conflicts,
get_asset_info_ids_by_ids,
reference_exists_for_asset_id,
get_reference_by_id,
insert_reference,
get_or_create_reference,
update_reference_timestamps,
list_references_page,
fetch_reference_asset_and_tags,
fetch_reference_and_asset,
update_reference_access_time,
set_reference_metadata,
delete_reference_by_id,
set_reference_preview,
bulk_insert_references_ignore_conflicts,
get_reference_ids_by_ids,
ensure_tags_exist,
add_tags_to_asset_info,
add_tags_to_reference,
)
from app.assets.helpers import get_utc_now
@@ -32,14 +32,14 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
return asset
def _make_asset_info(
def _make_reference(
session: Session,
asset: Asset,
name: str = "test",
owner_id: str = "",
) -> AssetInfo:
) -> AssetReference:
now = get_utc_now()
info = AssetInfo(
ref = AssetReference(
owner_id=owner_id,
name=name,
asset_id=asset.id,
@@ -47,381 +47,386 @@ def _make_asset_info(
updated_at=now,
last_access_time=now,
)
session.add(info)
session.add(ref)
session.flush()
return info
return ref
class TestAssetInfoExistsForAssetId:
def test_returns_false_when_no_info(self, session: Session):
class TestReferenceExistsForAssetId:
def test_returns_false_when_no_reference(self, session: Session):
asset = _make_asset(session, "hash1")
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is False
assert reference_exists_for_asset_id(session, asset_id=asset.id) is False
def test_returns_true_when_info_exists(self, session: Session):
def test_returns_true_when_reference_exists(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset)
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is True
_make_reference(session, asset)
assert reference_exists_for_asset_id(session, asset_id=asset.id) is True
class TestGetAssetInfoById:
class TestGetReferenceById:
def test_returns_none_for_nonexistent(self, session: Session):
assert get_asset_info_by_id(session, asset_info_id="nonexistent") is None
assert get_reference_by_id(session, reference_id="nonexistent") is None
def test_returns_info(self, session: Session):
def test_returns_reference(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="myfile.txt")
ref = _make_reference(session, asset, name="myfile.txt")
result = get_asset_info_by_id(session, asset_info_id=info.id)
result = get_reference_by_id(session, reference_id=ref.id)
assert result is not None
assert result.name == "myfile.txt"
class TestListAssetInfosPage:
class TestListReferencesPage:
def test_empty_db(self, session: Session):
infos, tag_map, total = list_asset_infos_page(session)
assert infos == []
refs, tag_map, total = list_references_page(session)
assert refs == []
assert tag_map == {}
assert total == 0
def test_returns_infos_with_tags(self, session: Session):
def test_returns_references_with_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="test.bin")
ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["alpha", "beta"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"])
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
session.commit()
infos, tag_map, total = list_asset_infos_page(session)
assert len(infos) == 1
assert infos[0].id == info.id
assert set(tag_map[info.id]) == {"alpha", "beta"}
refs, tag_map, total = list_references_page(session)
assert len(refs) == 1
assert refs[0].id == ref.id
assert set(tag_map[ref.id]) == {"alpha", "beta"}
assert total == 1
def test_name_contains_filter(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="model_v1.safetensors")
_make_asset_info(session, asset, name="config.json")
_make_reference(session, asset, name="model_v1.safetensors")
_make_reference(session, asset, name="config.json")
session.commit()
infos, _, total = list_asset_infos_page(session, name_contains="model")
refs, _, total = list_references_page(session, name_contains="model")
assert total == 1
assert infos[0].name == "model_v1.safetensors"
assert refs[0].name == "model_v1.safetensors"
def test_owner_visibility(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="public", owner_id="")
_make_asset_info(session, asset, name="private", owner_id="user1")
_make_reference(session, asset, name="public", owner_id="")
_make_reference(session, asset, name="private", owner_id="user1")
session.commit()
# Empty owner sees only public
infos, _, total = list_asset_infos_page(session, owner_id="")
refs, _, total = list_references_page(session, owner_id="")
assert total == 1
assert infos[0].name == "public"
assert refs[0].name == "public"
# Owner sees both
infos, _, total = list_asset_infos_page(session, owner_id="user1")
refs, _, total = list_references_page(session, owner_id="user1")
assert total == 2
def test_include_tags_filter(self, session: Session):
asset = _make_asset(session, "hash1")
info1 = _make_asset_info(session, asset, name="tagged")
_make_asset_info(session, asset, name="untagged")
ref1 = _make_reference(session, asset, name="tagged")
_make_reference(session, asset, name="untagged")
ensure_tags_exist(session, ["wanted"])
add_tags_to_asset_info(session, asset_info_id=info1.id, tags=["wanted"])
add_tags_to_reference(session, reference_id=ref1.id, tags=["wanted"])
session.commit()
infos, _, total = list_asset_infos_page(session, include_tags=["wanted"])
refs, _, total = list_references_page(session, include_tags=["wanted"])
assert total == 1
assert infos[0].name == "tagged"
assert refs[0].name == "tagged"
def test_exclude_tags_filter(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="keep")
info_exclude = _make_asset_info(session, asset, name="exclude")
_make_reference(session, asset, name="keep")
ref_exclude = _make_reference(session, asset, name="exclude")
ensure_tags_exist(session, ["bad"])
add_tags_to_asset_info(session, asset_info_id=info_exclude.id, tags=["bad"])
add_tags_to_reference(session, reference_id=ref_exclude.id, tags=["bad"])
session.commit()
infos, _, total = list_asset_infos_page(session, exclude_tags=["bad"])
refs, _, total = list_references_page(session, exclude_tags=["bad"])
assert total == 1
assert infos[0].name == "keep"
assert refs[0].name == "keep"
def test_sorting(self, session: Session):
asset = _make_asset(session, "hash1", size=100)
asset2 = _make_asset(session, "hash2", size=500)
_make_asset_info(session, asset, name="small")
_make_asset_info(session, asset2, name="large")
_make_reference(session, asset, name="small")
_make_reference(session, asset2, name="large")
session.commit()
infos, _, _ = list_asset_infos_page(session, sort="size", order="desc")
assert infos[0].name == "large"
refs, _, _ = list_references_page(session, sort="size", order="desc")
assert refs[0].name == "large"
infos, _, _ = list_asset_infos_page(session, sort="name", order="asc")
assert infos[0].name == "large"
refs, _, _ = list_references_page(session, sort="name", order="asc")
assert refs[0].name == "large"
class TestFetchAssetInfoAssetAndTags:
class TestFetchReferenceAssetAndTags:
def test_returns_none_for_nonexistent(self, session: Session):
result = fetch_asset_info_asset_and_tags(session, "nonexistent")
result = fetch_reference_asset_and_tags(session, "nonexistent")
assert result is None
def test_returns_tuple(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="test.bin")
ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["tag1"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["tag1"])
add_tags_to_reference(session, reference_id=ref.id, tags=["tag1"])
session.commit()
result = fetch_asset_info_asset_and_tags(session, info.id)
result = fetch_reference_asset_and_tags(session, ref.id)
assert result is not None
ret_info, ret_asset, ret_tags = result
assert ret_info.id == info.id
ret_ref, ret_asset, ret_tags = result
assert ret_ref.id == ref.id
assert ret_asset.id == asset.id
assert ret_tags == ["tag1"]
class TestFetchAssetInfoAndAsset:
class TestFetchReferenceAndAsset:
def test_returns_none_for_nonexistent(self, session: Session):
result = fetch_asset_info_and_asset(session, asset_info_id="nonexistent")
result = fetch_reference_and_asset(session, reference_id="nonexistent")
assert result is None
def test_returns_tuple(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
result = fetch_asset_info_and_asset(session, asset_info_id=info.id)
result = fetch_reference_and_asset(session, reference_id=ref.id)
assert result is not None
ret_info, ret_asset = result
assert ret_info.id == info.id
ret_ref, ret_asset = result
assert ret_ref.id == ref.id
assert ret_asset.id == asset.id
class TestUpdateAssetInfoAccessTime:
class TestUpdateReferenceAccessTime:
def test_updates_last_access_time(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
original_time = info.last_access_time
ref = _make_reference(session, asset)
original_time = ref.last_access_time
session.commit()
import time
time.sleep(0.01)
update_asset_info_access_time(session, asset_info_id=info.id)
update_reference_access_time(session, reference_id=ref.id)
session.commit()
session.refresh(info)
assert info.last_access_time > original_time
session.refresh(ref)
assert ref.last_access_time > original_time
class TestDeleteAssetInfoById:
class TestDeleteReferenceById:
def test_deletes_existing(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="")
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="")
assert result is True
assert get_asset_info_by_id(session, asset_info_id=info.id) is None
assert get_reference_by_id(session, reference_id=ref.id) is None
def test_returns_false_for_nonexistent(self, session: Session):
result = delete_asset_info_by_id(session, asset_info_id="nonexistent", owner_id="")
result = delete_reference_by_id(session, reference_id="nonexistent", owner_id="")
assert result is False
def test_respects_owner_visibility(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="user2")
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="user2")
assert result is False
assert get_asset_info_by_id(session, asset_info_id=info.id) is not None
assert get_reference_by_id(session, reference_id=ref.id) is not None
class TestSetAssetInfoPreview:
class TestSetReferencePreview:
def test_sets_preview(self, session: Session):
asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=preview_asset.id)
set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id)
session.commit()
session.refresh(info)
assert info.preview_id == preview_asset.id
session.refresh(ref)
assert ref.preview_id == preview_asset.id
def test_clears_preview(self, session: Session):
asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset)
info.preview_id = preview_asset.id
ref = _make_reference(session, asset)
ref.preview_id = preview_asset.id
session.commit()
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=None)
set_reference_preview(session, reference_id=ref.id, preview_asset_id=None)
session.commit()
session.refresh(info)
assert info.preview_id is None
session.refresh(ref)
assert ref.preview_id is None
def test_raises_for_nonexistent_info(self, session: Session):
def test_raises_for_nonexistent_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"):
set_asset_info_preview(session, asset_info_id="nonexistent", preview_asset_id=None)
set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None)
def test_raises_for_nonexistent_preview(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
with pytest.raises(ValueError, match="Preview Asset"):
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id="nonexistent")
set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent")
class TestInsertAssetInfo:
def test_creates_new_info(self, session: Session):
class TestInsertReference:
def test_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1")
info = insert_asset_info(
ref = insert_reference(
session, asset_id=asset.id, owner_id="user1", name="test.bin"
)
session.commit()
assert info is not None
assert info.name == "test.bin"
assert info.owner_id == "user1"
assert ref is not None
assert ref.name == "test.bin"
assert ref.owner_id == "user1"
def test_returns_none_on_conflict(self, session: Session):
def test_allows_duplicate_names(self, session: Session):
asset = _make_asset(session, "hash1")
insert_asset_info(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
ref1 = insert_reference(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
session.commit()
# Attempt duplicate with same (asset_id, owner_id, name)
result = insert_asset_info(
# Duplicate names are now allowed
ref2 = insert_reference(
session, asset_id=asset.id, owner_id="user1", name="dup.bin"
)
assert result is None
session.commit()
assert ref1 is not None
assert ref2 is not None
assert ref1.id != ref2.id
class TestGetOrCreateAssetInfo:
def test_creates_new_info(self, session: Session):
class TestGetOrCreateReference:
def test_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1")
info, created = get_or_create_asset_info(
ref, created = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="new.bin"
)
session.commit()
assert created is True
assert info.name == "new.bin"
assert ref.name == "new.bin"
def test_returns_existing_info(self, session: Session):
def test_always_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1")
info1, created1 = get_or_create_asset_info(
ref1, created1 = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
)
session.commit()
info2, created2 = get_or_create_asset_info(
# Duplicate names are allowed, so always creates new
ref2, created2 = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
)
session.commit()
assert created1 is True
assert created2 is False
assert info1.id == info2.id
assert created2 is True
assert ref1.id != ref2.id
class TestUpdateAssetInfoTimestamps:
class TestUpdateReferenceTimestamps:
def test_updates_timestamps(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
original_updated_at = info.updated_at
ref = _make_reference(session, asset)
original_updated_at = ref.updated_at
session.commit()
time.sleep(0.01)
update_asset_info_timestamps(session, info)
update_reference_timestamps(session, ref)
session.commit()
session.refresh(info)
assert info.updated_at > original_updated_at
session.refresh(ref)
assert ref.updated_at > original_updated_at
def test_updates_preview_id(self, session: Session):
asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
update_asset_info_timestamps(session, info, preview_id=preview_asset.id)
update_reference_timestamps(session, ref, preview_id=preview_asset.id)
session.commit()
session.refresh(info)
assert info.preview_id == preview_asset.id
session.refresh(ref)
assert ref.preview_id == preview_asset.id
class TestSetAssetInfoMetadata:
class TestSetReferenceMetadata:
def test_sets_metadata(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
set_asset_info_metadata(
session, asset_info_id=info.id, user_metadata={"key": "value"}
set_reference_metadata(
session, reference_id=ref.id, user_metadata={"key": "value"}
)
session.commit()
session.refresh(info)
assert info.user_metadata == {"key": "value"}
session.refresh(ref)
assert ref.user_metadata == {"key": "value"}
# Check metadata table
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1
assert meta[0].key == "key"
assert meta[0].val_str == "value"
def test_replaces_existing_metadata(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
set_asset_info_metadata(
session, asset_info_id=info.id, user_metadata={"old": "data"}
set_reference_metadata(
session, reference_id=ref.id, user_metadata={"old": "data"}
)
session.commit()
set_asset_info_metadata(
session, asset_info_id=info.id, user_metadata={"new": "data"}
set_reference_metadata(
session, reference_id=ref.id, user_metadata={"new": "data"}
)
session.commit()
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1
assert meta[0].key == "new"
def test_clears_metadata_with_empty_dict(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
set_asset_info_metadata(
session, asset_info_id=info.id, user_metadata={"key": "value"}
set_reference_metadata(
session, reference_id=ref.id, user_metadata={"key": "value"}
)
session.commit()
set_asset_info_metadata(
session, asset_info_id=info.id, user_metadata={}
set_reference_metadata(
session, reference_id=ref.id, user_metadata={}
)
session.commit()
session.refresh(info)
assert info.user_metadata == {}
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
session.refresh(ref)
assert ref.user_metadata == {}
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 0
def test_raises_for_nonexistent(self, session: Session):
with pytest.raises(ValueError, match="not found"):
set_asset_info_metadata(
session, asset_info_id="nonexistent", user_metadata={"key": "value"}
set_reference_metadata(
session, reference_id="nonexistent", user_metadata={"key": "value"}
)
class TestBulkInsertAssetInfosIgnoreConflicts:
def test_inserts_multiple_infos(self, session: Session):
class TestBulkInsertReferencesIgnoreConflicts:
def test_inserts_multiple_references(self, session: Session):
asset = _make_asset(session, "hash1")
now = get_utc_now()
rows = [
@@ -448,15 +453,15 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
"last_access_time": now,
},
]
bulk_insert_asset_infos_ignore_conflicts(session, rows)
bulk_insert_references_ignore_conflicts(session, rows)
session.commit()
infos = session.query(AssetInfo).all()
assert len(infos) == 2
refs = session.query(AssetReference).all()
assert len(refs) == 2
def test_ignores_conflicts(self, session: Session):
def test_allows_duplicate_names(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="existing.bin", owner_id="")
_make_reference(session, asset, name="existing.bin", owner_id="")
session.commit()
now = get_utc_now()
@@ -484,28 +489,29 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
"last_access_time": now,
},
]
bulk_insert_asset_infos_ignore_conflicts(session, rows)
bulk_insert_references_ignore_conflicts(session, rows)
session.commit()
infos = session.query(AssetInfo).all()
assert len(infos) == 2 # existing + new, not 3
# Duplicate names allowed, so all 3 rows exist
refs = session.query(AssetReference).all()
assert len(refs) == 3
def test_empty_list_is_noop(self, session: Session):
bulk_insert_asset_infos_ignore_conflicts(session, [])
assert session.query(AssetInfo).count() == 0
bulk_insert_references_ignore_conflicts(session, [])
assert session.query(AssetReference).count() == 0
class TestGetAssetInfoIdsByIds:
class TestGetReferenceIdsByIds:
def test_returns_existing_ids(self, session: Session):
asset = _make_asset(session, "hash1")
info1 = _make_asset_info(session, asset, name="a.bin")
info2 = _make_asset_info(session, asset, name="b.bin")
ref1 = _make_reference(session, asset, name="a.bin")
ref2 = _make_reference(session, asset, name="b.bin")
session.commit()
found = get_asset_info_ids_by_ids(session, [info1.id, info2.id, "nonexistent"])
found = get_reference_ids_by_ids(session, [ref1.id, ref2.id, "nonexistent"])
assert found == {info1.id, info2.id}
assert found == {ref1.id, ref2.id}
def test_empty_list_returns_empty(self, session: Session):
found = get_asset_info_ids_by_ids(session, [])
found = get_reference_ids_by_ids(session, [])
assert found == set()

View File

@@ -1,21 +1,21 @@
"""Tests for cache_state query functions."""
"""Tests for cache_state (AssetReference file path) query functions."""
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo
from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import (
list_cache_states_by_asset_id,
upsert_cache_state,
list_references_by_asset_id,
upsert_reference,
get_unreferenced_unhashed_asset_ids,
delete_assets_by_ids,
get_cache_states_for_prefixes,
get_references_for_prefixes,
bulk_update_needs_verify,
delete_cache_states_by_ids,
delete_references_by_ids,
delete_orphaned_seed_asset,
bulk_insert_cache_states_ignore_conflicts,
get_cache_states_by_paths_and_asset_ids,
mark_cache_states_missing_outside_prefixes,
restore_cache_states_by_paths,
bulk_insert_references_ignore_conflicts,
get_references_by_paths_and_asset_ids,
mark_references_missing_outside_prefixes,
restore_references_by_paths,
)
from app.assets.helpers import select_best_live_path, get_utc_now
@@ -27,49 +27,55 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
return asset
def _make_cache_state(
def _make_reference(
session: Session,
asset: Asset,
file_path: str,
name: str = "test",
mtime_ns: int | None = None,
needs_verify: bool = False,
) -> AssetCacheState:
state = AssetCacheState(
) -> AssetReference:
now = get_utc_now()
ref = AssetReference(
asset_id=asset.id,
file_path=file_path,
name=name,
mtime_ns=mtime_ns,
needs_verify=needs_verify,
created_at=now,
updated_at=now,
last_access_time=now,
)
session.add(state)
session.add(ref)
session.flush()
return state
return ref
class TestListCacheStatesByAssetId:
def test_returns_empty_for_no_states(self, session: Session):
class TestListReferencesByAssetId:
def test_returns_empty_for_no_references(self, session: Session):
asset = _make_asset(session, "hash1")
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
assert list(states) == []
refs = list_references_by_asset_id(session, asset_id=asset.id)
assert list(refs) == []
def test_returns_states_for_asset(self, session: Session):
def test_returns_references_for_asset(self, session: Session):
asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/path/a.bin")
_make_cache_state(session, asset, "/path/b.bin")
_make_reference(session, asset, "/path/a.bin", name="a")
_make_reference(session, asset, "/path/b.bin", name="b")
session.commit()
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
paths = [s.file_path for s in states]
refs = list_references_by_asset_id(session, asset_id=asset.id)
paths = [r.file_path for r in refs]
assert set(paths) == {"/path/a.bin", "/path/b.bin"}
def test_does_not_return_other_assets_states(self, session: Session):
def test_does_not_return_other_assets_references(self, session: Session):
asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path/asset1.bin")
_make_cache_state(session, asset2, "/path/asset2.bin")
_make_reference(session, asset1, "/path/asset1.bin", name="a1")
_make_reference(session, asset2, "/path/asset2.bin", name="a2")
session.commit()
states = list_cache_states_by_asset_id(session, asset_id=asset1.id)
paths = [s.file_path for s in states]
refs = list_references_by_asset_id(session, asset_id=asset1.id)
paths = [r.file_path for r in refs]
assert paths == ["/path/asset1.bin"]
@@ -80,10 +86,10 @@ class TestSelectBestLivePath:
def test_returns_empty_when_no_files_exist(self, session: Session):
asset = _make_asset(session, "hash1")
state = _make_cache_state(session, asset, "/nonexistent/path.bin")
ref = _make_reference(session, asset, "/nonexistent/path.bin")
session.commit()
result = select_best_live_path([state])
result = select_best_live_path([ref])
assert result == ""
def test_prefers_verified_path(self, session: Session, tmp_path):
@@ -96,124 +102,125 @@ class TestSelectBestLivePath:
unverified_file = tmp_path / "unverified.bin"
unverified_file.write_bytes(b"data")
state_verified = _make_cache_state(
session, asset, str(verified_file), needs_verify=False
ref_verified = _make_reference(
session, asset, str(verified_file), name="verified", needs_verify=False
)
state_unverified = _make_cache_state(
session, asset, str(unverified_file), needs_verify=True
ref_unverified = _make_reference(
session, asset, str(unverified_file), name="unverified", needs_verify=True
)
session.commit()
states = [state_unverified, state_verified]
result = select_best_live_path(states)
refs = [ref_unverified, ref_verified]
result = select_best_live_path(refs)
assert result == str(verified_file)
def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
"""If all states need verification, return first existing path."""
"""If all references need verification, return first existing path."""
asset = _make_asset(session, "hash1")
existing_file = tmp_path / "exists.bin"
existing_file.write_bytes(b"data")
state = _make_cache_state(session, asset, str(existing_file), needs_verify=True)
ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
session.commit()
result = select_best_live_path([state])
result = select_best_live_path([ref])
assert result == str(existing_file)
class TestSelectBestLivePathWithMocking:
def test_handles_missing_file_path_attr(self):
"""Gracefully handle states with None file_path."""
"""Gracefully handle references with None file_path."""
class MockState:
class MockRef:
file_path = None
needs_verify = False
result = select_best_live_path([MockState()])
result = select_best_live_path([MockRef()])
assert result == ""
class TestUpsertCacheState:
class TestUpsertReference:
@pytest.mark.parametrize(
"initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
[
# New state creation
# New reference creation
(None, 12345, True, False, 12345),
# Existing state, same mtime - no update
# Existing reference, same mtime - no update
(100, 100, False, False, 100),
# Existing state, different mtime - update
# Existing reference, different mtime - update
(100, 200, False, True, 200),
],
ids=["new_state", "existing_no_change", "existing_update_mtime"],
ids=["new_reference", "existing_no_change", "existing_update_mtime"],
)
def test_upsert_scenarios(
self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
):
asset = _make_asset(session, "hash1")
file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
name = f"file_{initial_mtime}_{second_mtime}"
# Create initial state if needed
# Create initial reference if needed
if initial_mtime is not None:
upsert_cache_state(session, asset_id=asset.id, file_path=file_path, mtime_ns=initial_mtime)
upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
session.commit()
# The upsert call we're testing
created, updated = upsert_cache_state(
session, asset_id=asset.id, file_path=file_path, mtime_ns=second_mtime
created, updated = upsert_reference(
session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
)
session.commit()
assert created is expect_created
assert updated is expect_updated
state = session.query(AssetCacheState).filter_by(file_path=file_path).one()
assert state.mtime_ns == final_mtime
ref = session.query(AssetReference).filter_by(file_path=file_path).one()
assert ref.mtime_ns == final_mtime
def test_upsert_restores_missing_state(self, session: Session):
"""Upserting a cache state that was marked missing should restore it."""
def test_upsert_restores_missing_reference(self, session: Session):
"""Upserting a reference that was marked missing should restore it."""
asset = _make_asset(session, "hash1")
file_path = "/restored/file.bin"
state = _make_cache_state(session, asset, file_path, mtime_ns=100)
state.is_missing = True
ref = _make_reference(session, asset, file_path, mtime_ns=100)
ref.is_missing = True
session.commit()
created, updated = upsert_cache_state(
session, asset_id=asset.id, file_path=file_path, mtime_ns=100
created, updated = upsert_reference(
session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
)
session.commit()
assert created is False
assert updated is True
restored_state = session.query(AssetCacheState).filter_by(file_path=file_path).one()
assert restored_state.is_missing is False
restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
assert restored_ref.is_missing is False
class TestRestoreCacheStatesByPaths:
def test_restores_missing_states(self, session: Session):
class TestRestoreReferencesByPaths:
def test_restores_missing_references(self, session: Session):
asset = _make_asset(session, "hash1")
missing_path = "/missing/file.bin"
active_path = "/active/file.bin"
missing_state = _make_cache_state(session, asset, missing_path)
missing_state.is_missing = True
_make_cache_state(session, asset, active_path)
missing_ref = _make_reference(session, asset, missing_path, name="missing")
missing_ref.is_missing = True
_make_reference(session, asset, active_path, name="active")
session.commit()
restored = restore_cache_states_by_paths(session, [missing_path])
restored = restore_references_by_paths(session, [missing_path])
session.commit()
assert restored == 1
state = session.query(AssetCacheState).filter_by(file_path=missing_path).one()
assert state.is_missing is False
ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
assert ref.is_missing is False
def test_empty_list_restores_nothing(self, session: Session):
restored = restore_cache_states_by_paths(session, [])
restored = restore_references_by_paths(session, [])
assert restored == 0
class TestMarkCacheStatesMissingOutsidePrefixes:
def test_marks_states_missing_outside_prefixes(self, session: Session, tmp_path):
class TestMarkReferencesMissingOutsidePrefixes:
def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
asset = _make_asset(session, "hash1")
valid_dir = tmp_path / "valid"
valid_dir.mkdir()
@@ -223,63 +230,58 @@ class TestMarkCacheStatesMissingOutsidePrefixes:
valid_path = str(valid_dir / "file.bin")
invalid_path = str(invalid_dir / "file.bin")
_make_cache_state(session, asset, valid_path)
_make_cache_state(session, asset, invalid_path)
_make_reference(session, asset, valid_path, name="valid")
_make_reference(session, asset, invalid_path, name="invalid")
session.commit()
marked = mark_cache_states_missing_outside_prefixes(session, [str(valid_dir)])
marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
session.commit()
assert marked == 1
all_states = session.query(AssetCacheState).all()
assert len(all_states) == 2
all_refs = session.query(AssetReference).all()
assert len(all_refs) == 2
valid_state = next(s for s in all_states if s.file_path == valid_path)
invalid_state = next(s for s in all_states if s.file_path == invalid_path)
assert valid_state.is_missing is False
assert invalid_state.is_missing is True
valid_ref = next(r for r in all_refs if r.file_path == valid_path)
invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
assert valid_ref.is_missing is False
assert invalid_ref.is_missing is True
def test_empty_prefixes_marks_nothing(self, session: Session):
asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/some/path.bin")
_make_reference(session, asset, "/some/path.bin")
session.commit()
marked = mark_cache_states_missing_outside_prefixes(session, [])
marked = mark_references_missing_outside_prefixes(session, [])
assert marked == 0
class TestGetUnreferencedUnhashedAssetIds:
def test_returns_unreferenced_unhashed_assets(self, session: Session):
# Unhashed asset (hash=None) with no cache states
no_states = _make_asset(session, hash_val=None)
# Unhashed asset with active cache state (not unreferenced)
with_active_state = _make_asset(session, hash_val=None)
_make_cache_state(session, with_active_state, "/has/state.bin")
# Unhashed asset with only missing cache state (should be unreferenced)
with_missing_state = _make_asset(session, hash_val=None)
missing_state = _make_cache_state(session, with_missing_state, "/missing/state.bin")
missing_state.is_missing = True
# Unhashed asset (hash=None) with no references (no file_path)
no_refs = _make_asset(session, hash_val=None)
# Unhashed asset with active reference (not unreferenced)
with_active_ref = _make_asset(session, hash_val=None)
_make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
# Unhashed asset with only missing reference (should be unreferenced)
with_missing_ref = _make_asset(session, hash_val=None)
missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
missing_ref.is_missing = True
# Regular asset (hash not None) - should not be returned
_make_asset(session, hash_val="blake3:regular")
session.commit()
unreferenced = get_unreferenced_unhashed_asset_ids(session)
assert no_states.id in unreferenced
assert with_missing_state.id in unreferenced
assert with_active_state.id not in unreferenced
assert no_refs.id in unreferenced
assert with_missing_ref.id in unreferenced
assert with_active_ref.id not in unreferenced
class TestDeleteAssetsByIds:
def test_deletes_assets_and_infos(self, session: Session):
def test_deletes_assets_and_references(self, session: Session):
asset = _make_asset(session, "hash1")
now = get_utc_now()
info = AssetInfo(
owner_id="", name="test", asset_id=asset.id,
created_at=now, updated_at=now, last_access_time=now
)
session.add(info)
_make_reference(session, asset, "/test/path.bin", name="test")
session.commit()
deleted = delete_assets_by_ids(session, [asset.id])
@@ -287,7 +289,7 @@ class TestDeleteAssetsByIds:
assert deleted == 1
assert session.query(Asset).count() == 0
assert session.query(AssetInfo).count() == 0
assert session.query(AssetReference).count() == 0
def test_empty_list_deletes_nothing(self, session: Session):
_make_asset(session, "hash1")
@@ -299,8 +301,8 @@ class TestDeleteAssetsByIds:
assert session.query(Asset).count() == 1
class TestGetCacheStatesForPrefixes:
def test_returns_states_matching_prefix(self, session: Session, tmp_path):
class TestGetReferencesForPrefixes:
def test_returns_references_matching_prefix(self, session: Session, tmp_path):
asset = _make_asset(session, "hash1")
dir1 = tmp_path / "dir1"
dir1.mkdir()
@@ -310,21 +312,21 @@ class TestGetCacheStatesForPrefixes:
path1 = str(dir1 / "file.bin")
path2 = str(dir2 / "file.bin")
_make_cache_state(session, asset, path1, mtime_ns=100)
_make_cache_state(session, asset, path2, mtime_ns=200)
_make_reference(session, asset, path1, name="file1", mtime_ns=100)
_make_reference(session, asset, path2, name="file2", mtime_ns=200)
session.commit()
rows = get_cache_states_for_prefixes(session, [str(dir1)])
rows = get_references_for_prefixes(session, [str(dir1)])
assert len(rows) == 1
assert rows[0].file_path == path1
def test_empty_prefixes_returns_empty(self, session: Session):
asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/some/path.bin")
_make_reference(session, asset, "/some/path.bin")
session.commit()
rows = get_cache_states_for_prefixes(session, [])
rows = get_references_for_prefixes(session, [])
assert rows == []
@@ -332,39 +334,39 @@ class TestGetCacheStatesForPrefixes:
class TestBulkSetNeedsVerify:
def test_sets_needs_verify_flag(self, session: Session):
asset = _make_asset(session, "hash1")
state1 = _make_cache_state(session, asset, "/path1.bin", needs_verify=False)
state2 = _make_cache_state(session, asset, "/path2.bin", needs_verify=False)
ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
session.commit()
updated = bulk_update_needs_verify(session, [state1.id, state2.id], True)
updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
session.commit()
assert updated == 2
session.refresh(state1)
session.refresh(state2)
assert state1.needs_verify is True
assert state2.needs_verify is True
session.refresh(ref1)
session.refresh(ref2)
assert ref1.needs_verify is True
assert ref2.needs_verify is True
def test_empty_list_updates_nothing(self, session: Session):
updated = bulk_update_needs_verify(session, [], True)
assert updated == 0
class TestDeleteCacheStatesByIds:
def test_deletes_states_by_id(self, session: Session):
class TestDeleteReferencesByIds:
def test_deletes_references_by_id(self, session: Session):
asset = _make_asset(session, "hash1")
state1 = _make_cache_state(session, asset, "/path1.bin")
_make_cache_state(session, asset, "/path2.bin")
ref1 = _make_reference(session, asset, "/path1.bin")
_make_reference(session, asset, "/path2.bin")
session.commit()
deleted = delete_cache_states_by_ids(session, [state1.id])
deleted = delete_references_by_ids(session, [ref1.id])
session.commit()
assert deleted == 1
assert session.query(AssetCacheState).count() == 1
assert session.query(AssetReference).count() == 1
def test_empty_list_deletes_nothing(self, session: Session):
deleted = delete_cache_states_by_ids(session, [])
deleted = delete_references_by_ids(session, [])
assert deleted == 0
@@ -384,12 +386,7 @@ class TestDeleteOrphanedSeedAsset:
if create_asset:
asset = _make_asset(session, hash_val=None)
asset_id = asset.id
now = get_utc_now()
info = AssetInfo(
owner_id="", name="test", asset_id=asset.id,
created_at=now, updated_at=now, last_access_time=now
)
session.add(info)
_make_reference(session, asset, "/test/path.bin", name="test")
session.commit()
deleted = delete_orphaned_seed_asset(session, asset_id)
@@ -400,53 +397,87 @@ class TestDeleteOrphanedSeedAsset:
assert session.query(Asset).count() == expected_count
class TestBulkInsertCacheStatesIgnoreConflicts:
def test_inserts_multiple_states(self, session: Session):
class TestBulkInsertReferencesIgnoreConflicts:
def test_inserts_multiple_references(self, session: Session):
asset = _make_asset(session, "hash1")
now = get_utc_now()
rows = [
{"asset_id": asset.id, "file_path": "/bulk1.bin", "mtime_ns": 100},
{"asset_id": asset.id, "file_path": "/bulk2.bin", "mtime_ns": 200},
{
"asset_id": asset.id,
"file_path": "/bulk1.bin",
"name": "bulk1",
"mtime_ns": 100,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
{
"asset_id": asset.id,
"file_path": "/bulk2.bin",
"name": "bulk2",
"mtime_ns": 200,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
]
bulk_insert_cache_states_ignore_conflicts(session, rows)
bulk_insert_references_ignore_conflicts(session, rows)
session.commit()
assert session.query(AssetCacheState).count() == 2
assert session.query(AssetReference).count() == 2
def test_ignores_conflicts(self, session: Session):
asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/existing.bin", mtime_ns=100)
_make_reference(session, asset, "/existing.bin", mtime_ns=100)
session.commit()
now = get_utc_now()
rows = [
{"asset_id": asset.id, "file_path": "/existing.bin", "mtime_ns": 999},
{"asset_id": asset.id, "file_path": "/new.bin", "mtime_ns": 200},
{
"asset_id": asset.id,
"file_path": "/existing.bin",
"name": "existing",
"mtime_ns": 999,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
{
"asset_id": asset.id,
"file_path": "/new.bin",
"name": "new",
"mtime_ns": 200,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
]
bulk_insert_cache_states_ignore_conflicts(session, rows)
bulk_insert_references_ignore_conflicts(session, rows)
session.commit()
assert session.query(AssetCacheState).count() == 2
existing = session.query(AssetCacheState).filter_by(file_path="/existing.bin").one()
assert session.query(AssetReference).count() == 2
existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
assert existing.mtime_ns == 100 # Original value preserved
def test_empty_list_is_noop(self, session: Session):
bulk_insert_cache_states_ignore_conflicts(session, [])
assert session.query(AssetCacheState).count() == 0
bulk_insert_references_ignore_conflicts(session, [])
assert session.query(AssetReference).count() == 0
class TestGetCacheStatesByPathsAndAssetIds:
class TestGetReferencesByPathsAndAssetIds:
def test_returns_matching_paths(self, session: Session):
asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path1.bin")
_make_cache_state(session, asset2, "/path2.bin")
_make_reference(session, asset1, "/path1.bin")
_make_reference(session, asset2, "/path2.bin")
session.commit()
path_to_asset = {
"/path1.bin": asset1.id,
"/path2.bin": asset2.id,
}
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset)
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
assert winners == {"/path1.bin", "/path2.bin"}
@@ -454,15 +485,15 @@ class TestGetCacheStatesByPathsAndAssetIds:
asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path1.bin")
_make_reference(session, asset1, "/path1.bin")
session.commit()
# Path exists but with different asset_id
path_to_asset = {"/path1.bin": asset2.id}
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset)
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
assert winners == set()
def test_empty_dict_returns_empty(self, session: Session):
winners = get_cache_states_by_paths_and_asset_ids(session, {})
winners = get_references_by_paths_and_asset_ids(session, {})
assert winners == set()

View File

@@ -1,10 +1,10 @@
"""Tests for metadata filtering logic in asset_info queries."""
"""Tests for metadata filtering logic in asset_reference queries."""
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta
from app.assets.database.queries import list_asset_infos_page
from app.assets.database.queries.asset_info import convert_metadata_to_rows
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
from app.assets.database.queries import list_references_page
from app.assets.database.queries.asset_reference import convert_metadata_to_rows
from app.assets.helpers import get_utc_now
@@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str) -> Asset:
return asset
def _make_asset_info(
def _make_reference(
session: Session,
asset: Asset,
name: str,
metadata: dict | None = None,
) -> AssetInfo:
) -> AssetReference:
now = get_utc_now()
info = AssetInfo(
ref = AssetReference(
owner_id="",
name=name,
asset_id=asset.id,
@@ -31,14 +31,14 @@ def _make_asset_info(
updated_at=now,
last_access_time=now,
)
session.add(info)
session.add(ref)
session.flush()
if metadata:
for key, val in metadata.items():
for row in convert_metadata_to_rows(key, val):
meta_row = AssetInfoMeta(
asset_info_id=info.id,
meta_row = AssetReferenceMeta(
asset_reference_id=ref.id,
key=row["key"],
ordinal=row.get("ordinal", 0),
val_str=row.get("val_str"),
@@ -49,7 +49,7 @@ def _make_asset_info(
session.add(meta_row)
session.flush()
return info
return ref
class TestMetadataFilterByType:
@@ -75,15 +75,15 @@ class TestMetadataFilterByType:
self, session: Session, match_meta, nomatch_meta, filter_key, filter_val
):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "match", match_meta)
_make_asset_info(session, asset, "nomatch", nomatch_meta)
_make_reference(session, asset, "match", match_meta)
_make_reference(session, asset, "nomatch", nomatch_meta)
session.commit()
infos, _, total = list_asset_infos_page(
refs, _, total = list_references_page(
session, metadata_filter={filter_key: filter_val}
)
assert total == 1
assert infos[0].name == "match"
assert refs[0].name == "match"
@pytest.mark.parametrize(
"stored_meta,filter_key,filter_val",
@@ -101,10 +101,10 @@ class TestMetadataFilterByType:
self, session: Session, stored_meta, filter_key, filter_val
):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "item", stored_meta)
_make_reference(session, asset, "item", stored_meta)
session.commit()
infos, _, total = list_asset_infos_page(
refs, _, total = list_references_page(
session, metadata_filter={filter_key: filter_val}
)
assert total == 0
@@ -127,13 +127,13 @@ class TestMetadataFilterNull:
self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key
):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, match_name, match_meta)
_make_asset_info(session, asset, nomatch_name, nomatch_meta)
_make_reference(session, asset, match_name, match_meta)
_make_reference(session, asset, nomatch_name, nomatch_meta)
session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={filter_key: None})
refs, _, total = list_references_page(session, metadata_filter={filter_key: None})
assert total == 1
assert infos[0].name == match_name
assert refs[0].name == match_name
class TestMetadataFilterList:
@@ -142,14 +142,14 @@ class TestMetadataFilterList:
def test_filter_by_list_matches_any(self, session: Session):
"""List values should match ANY of the values (OR)."""
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "cat_a", {"category": "a"})
_make_asset_info(session, asset, "cat_b", {"category": "b"})
_make_asset_info(session, asset, "cat_c", {"category": "c"})
_make_reference(session, asset, "cat_a", {"category": "a"})
_make_reference(session, asset, "cat_b", {"category": "b"})
_make_reference(session, asset, "cat_c", {"category": "c"})
session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={"category": ["a", "b"]})
refs, _, total = list_references_page(session, metadata_filter={"category": ["a", "b"]})
assert total == 2
names = {i.name for i in infos}
names = {r.name for r in refs}
assert names == {"cat_a", "cat_b"}
@@ -159,16 +159,16 @@ class TestMetadataFilterMultipleKeys:
def test_multiple_keys_must_all_match(self, session: Session):
"""Multiple keys should ALL match (AND)."""
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "match", {"type": "model", "version": 2})
_make_asset_info(session, asset, "wrong_type", {"type": "config", "version": 2})
_make_asset_info(session, asset, "wrong_version", {"type": "model", "version": 1})
_make_reference(session, asset, "match", {"type": "model", "version": 2})
_make_reference(session, asset, "wrong_type", {"type": "config", "version": 2})
_make_reference(session, asset, "wrong_version", {"type": "model", "version": 1})
session.commit()
infos, _, total = list_asset_infos_page(
refs, _, total = list_references_page(
session, metadata_filter={"type": "model", "version": 2}
)
assert total == 1
assert infos[0].name == "match"
assert refs[0].name == "match"
class TestMetadataFilterEmptyDict:
@@ -176,9 +176,9 @@ class TestMetadataFilterEmptyDict:
def test_empty_filter_returns_all(self, session: Session):
asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "a", {"key": "val"})
_make_asset_info(session, asset, "b", {})
_make_reference(session, asset, "a", {"key": "val"})
_make_reference(session, asset, "b", {})
session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={})
refs, _, total = list_references_page(session, metadata_filter={})
assert total == 2

View File

@@ -1,13 +1,13 @@
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoTag, AssetInfoMeta, Tag
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, AssetReferenceMeta, Tag
from app.assets.database.queries import (
ensure_tags_exist,
get_asset_tags,
set_asset_info_tags,
add_tags_to_asset_info,
remove_tags_from_asset_info,
get_reference_tags,
set_reference_tags,
add_tags_to_reference,
remove_tags_from_reference,
add_missing_tag_for_asset_id,
remove_missing_tag_for_asset_id,
list_tags_with_usage,
@@ -23,9 +23,9 @@ def _make_asset(session: Session, hash_val: str | None = None) -> Asset:
return asset
def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetInfo:
def _make_reference(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetReference:
now = get_utc_now()
info = AssetInfo(
ref = AssetReference(
owner_id=owner_id,
name=name,
asset_id=asset.id,
@@ -33,9 +33,9 @@ def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_i
updated_at=now,
last_access_time=now,
)
session.add(info)
session.add(ref)
session.flush()
return info
return ref
class TestEnsureTagsExist:
@@ -73,35 +73,35 @@ class TestEnsureTagsExist:
assert tag.tag_type == "system"
class TestGetAssetTags:
class TestGetReferenceTags:
def test_returns_empty_for_no_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
tags = get_asset_tags(session, asset_info_id=info.id)
tags = get_reference_tags(session, reference_id=ref.id)
assert tags == []
def test_returns_tags_for_asset(self, session: Session):
def test_returns_tags_for_reference(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["tag1", "tag2"])
session.add_all([
AssetInfoTag(asset_info_id=info.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
AssetInfoTag(asset_info_id=info.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
])
session.flush()
tags = get_asset_tags(session, asset_info_id=info.id)
tags = get_reference_tags(session, reference_id=ref.id)
assert set(tags) == {"tag1", "tag2"}
class TestSetAssetInfoTags:
class TestSetReferenceTags:
def test_adds_new_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"])
result = set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
session.commit()
assert set(result["added"]) == {"a", "b"}
@@ -110,10 +110,10 @@ class TestSetAssetInfoTags:
def test_removes_old_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b", "c"])
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a"])
set_reference_tags(session, reference_id=ref.id, tags=["a", "b", "c"])
result = set_reference_tags(session, reference_id=ref.id, tags=["a"])
session.commit()
assert result["added"] == []
@@ -122,10 +122,10 @@ class TestSetAssetInfoTags:
def test_replaces_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"])
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["b", "c"])
set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
result = set_reference_tags(session, reference_id=ref.id, tags=["b", "c"])
session.commit()
assert result["added"] == ["c"]
@@ -133,12 +133,12 @@ class TestSetAssetInfoTags:
assert set(result["total"]) == {"b", "c"}
class TestAddTagsToAssetInfo:
class TestAddTagsToReference:
def test_adds_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"])
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
session.commit()
assert set(result["added"]) == {"x", "y"}
@@ -146,27 +146,27 @@ class TestAddTagsToAssetInfo:
def test_reports_already_present(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x"])
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"])
add_tags_to_reference(session, reference_id=ref.id, tags=["x"])
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
session.commit()
assert result["added"] == ["y"]
assert result["already_present"] == ["x"]
def test_raises_for_missing_asset_info(self, session: Session):
def test_raises_for_missing_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"):
add_tags_to_asset_info(session, asset_info_id="nonexistent", tags=["x"])
add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
class TestRemoveTagsFromAssetInfo:
class TestRemoveTagsFromReference:
def test_removes_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"])
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "b"])
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "b"])
session.commit()
assert set(result["removed"]) == {"a", "b"}
@@ -175,54 +175,54 @@ class TestRemoveTagsFromAssetInfo:
def test_reports_not_present(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a"])
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "x"])
add_tags_to_reference(session, reference_id=ref.id, tags=["a"])
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "x"])
session.commit()
assert result["removed"] == ["a"]
assert result["not_present"] == ["x"]
def test_raises_for_missing_asset_info(self, session: Session):
def test_raises_for_missing_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"):
remove_tags_from_asset_info(session, asset_info_id="nonexistent", tags=["x"])
remove_tags_from_reference(session, reference_id="nonexistent", tags=["x"])
class TestMissingTagFunctions:
def test_add_missing_tag_for_asset_id(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit()
tags = get_asset_tags(session, asset_info_id=info.id)
tags = get_reference_tags(session, reference_id=ref.id)
assert "missing" in tags
def test_add_missing_tag_is_idempotent(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id)
add_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit()
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="missing").all()
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="missing").all()
assert len(links) == 1
def test_remove_missing_tag_for_asset_id(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id)
remove_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit()
tags = get_asset_tags(session, asset_info_id=info.id)
tags = get_reference_tags(session, reference_id=ref.id)
assert "missing" not in tags
@@ -231,8 +231,8 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
ref = _make_reference(session, asset)
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit()
rows, total = list_tags_with_usage(session)
@@ -246,8 +246,8 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
ref = _make_reference(session, asset)
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit()
rows, total = list_tags_with_usage(session, include_zero=False)
@@ -278,11 +278,11 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["shared-tag", "owner-tag"])
asset = _make_asset(session, "hash1")
shared_info = _make_asset_info(session, asset, name="shared", owner_id="")
owner_info = _make_asset_info(session, asset, name="owned", owner_id="user1")
shared_ref = _make_reference(session, asset, name="shared", owner_id="")
owner_ref = _make_reference(session, asset, name="owned", owner_id="user1")
add_tags_to_asset_info(session, asset_info_id=shared_info.id, tags=["shared-tag"])
add_tags_to_asset_info(session, asset_info_id=owner_info.id, tags=["owner-tag"])
add_tags_to_reference(session, reference_id=shared_ref.id, tags=["shared-tag"])
add_tags_to_reference(session, reference_id=owner_ref.id, tags=["owner-tag"])
session.commit()
# Empty owner sees only shared
@@ -301,29 +301,29 @@ class TestListTagsWithUsage:
class TestBulkInsertTagsAndMeta:
def test_inserts_tags(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"])
session.commit()
now = get_utc_now()
tag_rows = [
{"asset_info_id": info.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
{"asset_info_id": info.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
{"asset_reference_id": ref.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
{"asset_reference_id": ref.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
]
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
session.commit()
tags = get_asset_tags(session, asset_info_id=info.id)
tags = get_reference_tags(session, reference_id=ref.id)
assert set(tags) == {"bulk-tag1", "bulk-tag2"}
def test_inserts_meta(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
meta_rows = [
{
"asset_info_id": info.id,
"asset_reference_id": ref.id,
"key": "meta-key",
"ordinal": 0,
"val_str": "meta-value",
@@ -335,32 +335,32 @@ class TestBulkInsertTagsAndMeta:
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows)
session.commit()
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1
assert meta[0].key == "meta-key"
assert meta[0].val_str == "meta-value"
def test_ignores_conflicts(self, session: Session):
asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["existing-tag"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing-tag"])
add_tags_to_reference(session, reference_id=ref.id, tags=["existing-tag"])
session.commit()
now = get_utc_now()
tag_rows = [
{"asset_info_id": info.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
{"asset_reference_id": ref.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
]
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
session.commit()
# Should still have only one tag link
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="existing-tag").all()
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="existing-tag").all()
assert len(links) == 1
# Origin should be original, not overwritten
assert links[0].origin == "manual"
def test_empty_lists_is_noop(self, session: Session):
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[])
assert session.query(AssetInfoTag).count() == 0
assert session.query(AssetInfoMeta).count() == 0
assert session.query(AssetReferenceTag).count() == 0
assert session.query(AssetReferenceMeta).count() == 0

View File

@@ -9,6 +9,12 @@ from sqlalchemy.orm import Session
from app.assets.database.models import Base
@pytest.fixture(autouse=True)
def autoclean_unit_test_assets():
"""Override parent autouse fixture - service unit tests don't need server cleanup."""
yield
@pytest.fixture
def db_engine():
"""In-memory SQLite engine for fast unit tests."""

View File

@@ -2,8 +2,8 @@
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
from app.assets.helpers import get_utc_now
from app.assets.services import (
get_asset_detail,
@@ -20,14 +20,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 102
return asset
def _make_asset_info(
def _make_reference(
session: Session,
asset: Asset,
name: str = "test",
owner_id: str = "",
) -> AssetInfo:
) -> AssetReference:
now = get_utc_now()
info = AssetInfo(
ref = AssetReference(
owner_id=owner_id,
name=name,
asset_id=asset.id,
@@ -35,70 +35,70 @@ def _make_asset_info(
updated_at=now,
last_access_time=now,
)
session.add(info)
session.add(ref)
session.flush()
return info
return ref
class TestGetAssetDetail:
def test_returns_none_for_nonexistent(self, mock_create_session):
result = get_asset_detail(asset_info_id="nonexistent")
result = get_asset_detail(reference_id="nonexistent")
assert result is None
def test_returns_asset_with_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, name="test.bin")
ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["alpha", "beta"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"])
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
session.commit()
result = get_asset_detail(asset_info_id=info.id)
result = get_asset_detail(reference_id=ref.id)
assert result is not None
assert result.info.id == info.id
assert result.ref.id == ref.id
assert result.asset.hash == asset.hash
assert set(result.tags) == {"alpha", "beta"}
def test_respects_owner_visibility(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
# Wrong owner cannot see
result = get_asset_detail(asset_info_id=info.id, owner_id="user2")
result = get_asset_detail(reference_id=ref.id, owner_id="user2")
assert result is None
# Correct owner can see
result = get_asset_detail(asset_info_id=info.id, owner_id="user1")
result = get_asset_detail(reference_id=ref.id, owner_id="user1")
assert result is not None
class TestUpdateAssetMetadata:
def test_updates_name(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, name="old_name.bin")
info_id = info.id
ref = _make_reference(session, asset, name="old_name.bin")
ref_id = ref.id
session.commit()
update_asset_metadata(
asset_info_id=info_id,
reference_id=ref_id,
name="new_name.bin",
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.name == "new_name.bin"
updated_ref = session.get(AssetReference, ref_id)
assert updated_ref.name == "new_name.bin"
def test_updates_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["old"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["old"])
add_tags_to_reference(session, reference_id=ref.id, tags=["old"])
session.commit()
result = update_asset_metadata(
asset_info_id=info.id,
reference_id=ref.id,
tags=["new1", "new2"],
)
@@ -107,84 +107,84 @@ class TestUpdateAssetMetadata:
def test_updates_user_metadata(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
info_id = info.id
ref = _make_reference(session, asset)
ref_id = ref.id
session.commit()
update_asset_metadata(
asset_info_id=info_id,
reference_id=ref_id,
user_metadata={"key": "value", "num": 42},
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.user_metadata["key"] == "value"
assert updated_info.user_metadata["num"] == 42
updated_ref = session.get(AssetReference, ref_id)
assert updated_ref.user_metadata["key"] == "value"
assert updated_ref.user_metadata["num"] == 42
def test_raises_for_nonexistent(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
update_asset_metadata(asset_info_id="nonexistent", name="fail")
update_asset_metadata(reference_id="nonexistent", name="fail")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
update_asset_metadata(
asset_info_id=info.id,
reference_id=ref.id,
name="new",
owner_id="user2",
)
class TestDeleteAssetReference:
def test_deletes_asset_info(self, mock_create_session, session: Session):
def test_deletes_reference(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
info_id = info.id
ref = _make_reference(session, asset)
ref_id = ref.id
session.commit()
result = delete_asset_reference(
asset_info_id=info_id,
reference_id=ref_id,
owner_id="",
delete_content_if_orphan=False,
)
assert result is True
assert session.get(AssetInfo, info_id) is None
assert session.get(AssetReference, ref_id) is None
def test_returns_false_for_nonexistent(self, mock_create_session):
result = delete_asset_reference(
asset_info_id="nonexistent",
reference_id="nonexistent",
owner_id="",
)
assert result is False
def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
info_id = info.id
ref = _make_reference(session, asset, owner_id="user1")
ref_id = ref.id
session.commit()
result = delete_asset_reference(
asset_info_id=info_id,
reference_id=ref_id,
owner_id="user2",
)
assert result is False
assert session.get(AssetInfo, info_id) is not None
assert session.get(AssetReference, ref_id) is not None
def test_keeps_asset_if_other_infos_exist(self, mock_create_session, session: Session):
def test_keeps_asset_if_other_references_exist(self, mock_create_session, session: Session):
asset = _make_asset(session)
info1 = _make_asset_info(session, asset, name="info1")
_make_asset_info(session, asset, name="info2") # Second info keeps asset alive
ref1 = _make_reference(session, asset, name="ref1")
_make_reference(session, asset, name="ref2") # Second ref keeps asset alive
asset_id = asset.id
session.commit()
delete_asset_reference(
asset_info_id=info1.id,
reference_id=ref1.id,
owner_id="",
delete_content_if_orphan=True,
)
@@ -194,19 +194,19 @@ class TestDeleteAssetReference:
def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
asset_id = asset.id
info_id = info.id
ref_id = ref.id
session.commit()
delete_asset_reference(
asset_info_id=info_id,
reference_id=ref_id,
owner_id="",
delete_content_if_orphan=True,
)
# Both info and asset should be gone
assert session.get(AssetInfo, info_id) is None
# Both ref and asset should be gone
assert session.get(AssetReference, ref_id) is None
assert session.get(Asset, asset_id) is None
@@ -214,51 +214,51 @@ class TestSetAssetPreview:
def test_sets_preview(self, mock_create_session, session: Session):
asset = _make_asset(session, hash_val="blake3:main")
preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset)
info_id = info.id
ref = _make_reference(session, asset)
ref_id = ref.id
preview_id = preview_asset.id
session.commit()
set_asset_preview(
asset_info_id=info_id,
reference_id=ref_id,
preview_asset_id=preview_id,
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.preview_id == preview_id
updated_ref = session.get(AssetReference, ref_id)
assert updated_ref.preview_id == preview_id
def test_clears_preview(self, mock_create_session, session: Session):
asset = _make_asset(session)
preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset)
info.preview_id = preview_asset.id
info_id = info.id
ref = _make_reference(session, asset)
ref.preview_id = preview_asset.id
ref_id = ref.id
session.commit()
set_asset_preview(
asset_info_id=info_id,
reference_id=ref_id,
preview_asset_id=None,
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.preview_id is None
updated_ref = session.get(AssetReference, ref_id)
assert updated_ref.preview_id is None
def test_raises_for_nonexistent_info(self, mock_create_session):
def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
set_asset_preview(asset_info_id="nonexistent")
set_asset_preview(reference_id="nonexistent")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
set_asset_preview(
asset_info_id=info.id,
reference_id=ref.id,
preview_asset_id=None,
owner_id="user2",
)

View File

@@ -4,7 +4,7 @@ from pathlib import Path
from sqlalchemy.orm import Session
from app.assets.database.models import Asset
from app.assets.database.models import Asset, AssetReference
from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
@@ -30,7 +30,7 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == 1
assert result.inserted_refs == 1
# Verify Asset has mime_type populated
assets = session.query(Asset).all()
@@ -58,7 +58,7 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == 1
assert result.inserted_refs == 1
assets = session.query(Asset).all()
assert len(assets) == 1
@@ -93,13 +93,12 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == len(test_cases)
assert result.inserted_refs == len(test_cases)
for filename, expected_mime in test_cases:
from app.assets.database.models import AssetInfo
info = session.query(AssetInfo).filter_by(name=filename).first()
assert info is not None
asset = session.query(Asset).filter_by(id=info.asset_id).first()
ref = session.query(AssetReference).filter_by(name=filename).first()
assert ref is not None
asset = session.query(Asset).filter_by(id=ref.asset_id).first()
assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"

View File

@@ -0,0 +1,253 @@
"""Tests for asset enrichment (mime_type and hash population)."""
from pathlib import Path
from unittest.mock import patch
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetReference
from app.assets.scanner import (
ENRICHMENT_HASHED,
ENRICHMENT_METADATA,
ENRICHMENT_STUB,
enrich_asset,
)
def _create_stub_asset(
session: Session,
file_path: str,
asset_id: str = "test-asset-id",
reference_id: str = "test-ref-id",
name: str | None = None,
) -> tuple[Asset, AssetReference]:
"""Create a stub asset with reference for testing enrichment."""
asset = Asset(
id=asset_id,
hash=None,
size_bytes=100,
mime_type=None,
)
session.add(asset)
session.flush()
ref = AssetReference(
id=reference_id,
asset_id=asset_id,
name=name or f"test-asset-{asset_id}",
owner_id="system",
file_path=file_path,
mtime_ns=1234567890000000000,
enrichment_level=ENRICHMENT_STUB,
)
session.add(ref)
session.flush()
return asset, ref
class TestEnrichAsset:
def test_extracts_mime_type_and_updates_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify mime_type is written to the Asset table during enrichment."""
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"\x00" * 100)
asset, ref = _create_stub_asset(
session, str(file_path), "asset-1", "ref-1"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=False,
)
assert new_level == ENRICHMENT_METADATA
session.expire_all()
updated_asset = session.get(Asset, "asset-1")
assert updated_asset is not None
assert updated_asset.mime_type == "application/safetensors"
def test_computes_hash_and_updates_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify hash is written to the Asset table during enrichment."""
file_path = temp_dir / "data.bin"
file_path.write_bytes(b"test content for hashing")
asset, ref = _create_stub_asset(
session, str(file_path), "asset-2", "ref-2"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
assert new_level == ENRICHMENT_HASHED
session.expire_all()
updated_asset = session.get(Asset, "asset-2")
assert updated_asset is not None
assert updated_asset.hash is not None
assert updated_asset.hash.startswith("blake3:")
def test_enrichment_updates_both_mime_and_hash(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify both mime_type and hash are set when full enrichment runs."""
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"\x00" * 50)
asset, ref = _create_stub_asset(
session, str(file_path), "asset-3", "ref-3"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
session.expire_all()
updated_asset = session.get(Asset, "asset-3")
assert updated_asset is not None
assert updated_asset.mime_type == "application/safetensors"
assert updated_asset.hash is not None
assert updated_asset.hash.startswith("blake3:")
def test_missing_file_returns_stub_level(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify missing files don't cause errors and return STUB level."""
file_path = temp_dir / "nonexistent.bin"
asset, ref = _create_stub_asset(
session, str(file_path), "asset-4", "ref-4"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
assert new_level == ENRICHMENT_STUB
session.expire_all()
updated_asset = session.get(Asset, "asset-4")
assert updated_asset.mime_type is None
assert updated_asset.hash is None
def test_duplicate_hash_merges_into_existing_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify duplicate files merge into existing asset instead of failing."""
file_path_1 = temp_dir / "file1.bin"
file_path_2 = temp_dir / "file2.bin"
content = b"identical content"
file_path_1.write_bytes(content)
file_path_2.write_bytes(content)
asset1, ref1 = _create_stub_asset(
session, str(file_path_1), "asset-dup-1", "ref-dup-1"
)
asset2, ref2 = _create_stub_asset(
session, str(file_path_2), "asset-dup-2", "ref-dup-2"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
enrich_asset(
file_path=str(file_path_1),
reference_id=ref1.id,
asset_id=asset1.id,
extract_metadata=True,
compute_hash=True,
)
enrich_asset(
file_path=str(file_path_2),
reference_id=ref2.id,
asset_id=asset2.id,
extract_metadata=True,
compute_hash=True,
)
session.expire_all()
updated_asset1 = session.get(Asset, "asset-dup-1")
assert updated_asset1 is not None
assert updated_asset1.hash is not None
updated_asset2 = session.get(Asset, "asset-dup-2")
assert updated_asset2 is None
updated_ref2 = session.get(AssetReference, "ref-dup-2")
assert updated_ref2 is not None
assert updated_ref2.asset_id == "asset-dup-1"

View File

@@ -4,13 +4,13 @@ from pathlib import Path
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo, Tag
from app.assets.database.queries import get_asset_tags
from app.assets.database.models import Asset, AssetReference, Tag
from app.assets.database.queries import get_reference_tags
from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
class TestIngestFileFromPath:
def test_creates_asset_and_cache_state(self, mock_create_session, temp_dir: Path, session: Session):
def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "test_file.bin"
file_path.write_bytes(b"test content")
@@ -23,19 +23,19 @@ class TestIngestFileFromPath:
)
assert result.asset_created is True
assert result.state_created is True
assert result.asset_info_id is None # no info_name provided
assert result.ref_created is True
assert result.reference_id is not None
# Verify DB state
assets = session.query(Asset).all()
assert len(assets) == 1
assert assets[0].hash == "blake3:abc123"
states = session.query(AssetCacheState).all()
assert len(states) == 1
assert states[0].file_path == str(file_path)
refs = session.query(AssetReference).all()
assert len(refs) == 1
assert refs[0].file_path == str(file_path)
def test_creates_asset_info_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"model data")
@@ -50,12 +50,12 @@ class TestIngestFileFromPath:
)
assert result.asset_created is True
assert result.asset_info_id is not None
assert result.reference_id is not None
info = session.query(AssetInfo).first()
assert info is not None
assert info.name == "My Model"
assert info.owner_id == "user1"
ref = session.query(AssetReference).first()
assert ref is not None
assert ref.name == "My Model"
assert ref.owner_id == "user1"
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "tagged.bin"
@@ -70,7 +70,7 @@ class TestIngestFileFromPath:
tags=["models", "checkpoints"],
)
assert result.asset_info_id is not None
assert result.reference_id is not None
# Verify tags were created and linked
tags = session.query(Tag).all()
@@ -78,8 +78,8 @@ class TestIngestFileFromPath:
assert "models" in tag_names
assert "checkpoints" in tag_names
asset_tags = get_asset_tags(session, asset_info_id=result.asset_info_id)
assert set(asset_tags) == {"models", "checkpoints"}
ref_tags = get_reference_tags(session, reference_id=result.reference_id)
assert set(ref_tags) == {"models", "checkpoints"}
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "dup.bin"
@@ -102,7 +102,7 @@ class TestIngestFileFromPath:
mtime_ns=1234567890000000001, # different mtime
)
assert r2.asset_created is False
assert r2.state_updated is True or r2.state_created is False
assert r2.ref_updated is True or r2.ref_created is False
# Still only one asset
assets = session.query(Asset).all()
@@ -127,9 +127,9 @@ class TestIngestFileFromPath:
preview_id=preview_id,
)
assert result.asset_info_id is not None
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first()
assert info.preview_id == preview_id
assert result.reference_id is not None
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
assert ref.preview_id == preview_id
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "bad_preview.bin"
@@ -144,13 +144,13 @@ class TestIngestFileFromPath:
preview_id="nonexistent-uuid",
)
assert result.asset_info_id is not None
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first()
assert info.preview_id is None
assert result.reference_id is not None
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
assert ref.preview_id is None
class TestRegisterExistingAsset:
def test_creates_info_for_existing_asset(self, mock_create_session, session: Session):
def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
# Create existing asset
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
session.add(asset)
@@ -168,42 +168,43 @@ class TestRegisterExistingAsset:
# Verify by re-fetching from DB
session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Registered Asset").all()
assert len(infos) == 1
refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
assert len(refs) == 1
def test_returns_existing_info(self, mock_create_session, session: Session):
# Create asset and info
asset = Asset(hash="blake3:withinfo", size_bytes=512)
def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
# Create asset and reference
asset = Asset(hash="blake3:withref", size_bytes=512)
session.add(asset)
session.flush()
from app.assets.helpers import get_utc_now
info = AssetInfo(
ref = AssetReference(
owner_id="",
name="Existing Info",
name="Existing Ref",
asset_id=asset.id,
created_at=get_utc_now(),
updated_at=get_utc_now(),
last_access_time=get_utc_now(),
)
session.add(info)
session.flush() # Flush to get the ID
info_id = info.id
session.add(ref)
session.flush()
ref_id = ref.id
session.commit()
result = _register_existing_asset(
asset_hash="blake3:withinfo",
name="Existing Info",
asset_hash="blake3:withref",
name="Existing Ref",
owner_id="",
)
assert result.created is False
# Multiple files with same name are allowed
assert result.created is True
# Verify only one AssetInfo exists for this name
# Verify two AssetReferences exist for this name
session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Existing Info").all()
assert len(infos) == 1
assert infos[0].id == info_id
refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
assert len(refs) == 2
assert ref_id in [r.id for r in refs]
def test_raises_for_nonexistent_hash(self, mock_create_session):
with pytest.raises(ValueError, match="No asset with hash"):
@@ -212,14 +213,14 @@ class TestRegisterExistingAsset:
name="Fail",
)
def test_applies_tags_to_new_info(self, mock_create_session, session: Session):
def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
asset = Asset(hash="blake3:tagged", size_bytes=256)
session.add(asset)
session.commit()
result = _register_existing_asset(
asset_hash="blake3:tagged",
name="Tagged Info",
name="Tagged Ref",
tags=["alpha", "beta"],
)

View File

@@ -2,8 +2,8 @@
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
from app.assets.helpers import get_utc_now
from app.assets.services import apply_tags, remove_tags, list_tags
@@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
return asset
def _make_asset_info(
def _make_reference(
session: Session,
asset: Asset,
name: str = "test",
owner_id: str = "",
) -> AssetInfo:
) -> AssetReference:
now = get_utc_now()
info = AssetInfo(
ref = AssetReference(
owner_id=owner_id,
name=name,
asset_id=asset.id,
@@ -30,19 +30,19 @@ def _make_asset_info(
updated_at=now,
last_access_time=now,
)
session.add(info)
session.add(ref)
session.flush()
return info
return ref
class TestApplyTags:
def test_adds_new_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
session.commit()
result = apply_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["alpha", "beta"],
)
@@ -52,31 +52,31 @@ class TestApplyTags:
def test_reports_already_present(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["existing"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing"])
add_tags_to_reference(session, reference_id=ref.id, tags=["existing"])
session.commit()
result = apply_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["existing", "new"],
)
assert result.added == ["new"]
assert result.already_present == ["existing"]
def test_raises_for_nonexistent_info(self, mock_create_session):
def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
apply_tags(asset_info_id="nonexistent", tags=["x"])
apply_tags(reference_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
apply_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["new"],
owner_id="user2",
)
@@ -85,13 +85,13 @@ class TestApplyTags:
class TestRemoveTags:
def test_removes_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["a", "b", "c"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"])
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
session.commit()
result = remove_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["a", "b"],
)
@@ -101,31 +101,31 @@ class TestRemoveTags:
def test_reports_not_present(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ref = _make_reference(session, asset)
ensure_tags_exist(session, ["present"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["present"])
add_tags_to_reference(session, reference_id=ref.id, tags=["present"])
session.commit()
result = remove_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["present", "absent"],
)
assert result.removed == ["present"]
assert result.not_present == ["absent"]
def test_raises_for_nonexistent_info(self, mock_create_session):
def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
remove_tags(asset_info_id="nonexistent", tags=["x"])
remove_tags(reference_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
ref = _make_reference(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
remove_tags(
asset_info_id=info.id,
reference_id=ref.id,
tags=["x"],
owner_id="user2",
)
@@ -135,8 +135,8 @@ class TestListTags:
def test_returns_tags_with_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session)
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
ref = _make_reference(session, asset)
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit()
rows, total = list_tags()
@@ -149,8 +149,8 @@ class TestListTags:
def test_excludes_zero_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session)
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
ref = _make_reference(session, asset)
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit()
rows, total = list_tags(include_zero=False)

View File

@@ -24,11 +24,11 @@ def test_create_from_hash_success(
assert b1["created_new"] is False
aid = b1["id"]
# Calling again with the same name should return the same AssetInfo id
# Calling again with the same name creates a new AssetInfo (duplicates allowed)
r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
b2 = r2.json()
assert r2.status_code == 201, b2
assert b2["id"] == aid
assert b2["id"] != aid # new reference, not the same one
def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict):

View File

@@ -18,25 +18,24 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma
assert r1.status_code == 201, a1
assert a1["created_new"] is True
# Second upload with the same data and name should return created_new == False and the same asset
# Second upload with the same data and name creates a new AssetInfo (duplicates allowed)
# Returns 200 because Asset already exists, but a new AssetInfo is created
files = {"file": (name, data, "application/octet-stream")}
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
a2 = r2.json()
assert r2.status_code == 200, a2
assert a2["created_new"] is False
assert r2.status_code in (200, 201), a2
assert a2["asset_hash"] == a1["asset_hash"]
assert a2["id"] == a1["id"] # old reference
assert a2["id"] != a1["id"] # new reference with same content
# Third upload with the same data but new name should return created_new == False and the new AssetReference
# Third upload with the same data but different name also creates new AssetInfo
files = {"file": (name, data, "application/octet-stream")}
form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
a3 = r2.json()
assert r2.status_code == 200, a3
assert a3["created_new"] is False
r3 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
a3 = r3.json()
assert r3.status_code in (200, 201), a3
assert a3["asset_hash"] == a1["asset_hash"]
assert a3["id"] != a1["id"] # old reference
assert a3["id"] != a1["id"]
def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):