mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-09 15:20:06 +00:00
refactor(assets): modular architecture + async two-phase scanner & background seeder (#12621)
This commit is contained in:
committed by
GitHub
parent
a7a6335be5
commit
29b24cb517
@@ -108,7 +108,7 @@ def comfy_url_and_proc(comfy_tmp_base_dir: Path, request: pytest.FixtureRequest)
|
||||
"main.py",
|
||||
f"--base-directory={str(comfy_tmp_base_dir)}",
|
||||
f"--database-url={db_url}",
|
||||
"--disable-assets-autoscan",
|
||||
"--enable-assets",
|
||||
"--listen",
|
||||
"127.0.0.1",
|
||||
"--port",
|
||||
@@ -212,7 +212,7 @@ def asset_factory(http: requests.Session, api_base: str):
|
||||
|
||||
for aid in created:
|
||||
with contextlib.suppress(Exception):
|
||||
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
|
||||
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -258,14 +258,4 @@ def autoclean_unit_test_assets(http: requests.Session, api_base: str):
|
||||
break
|
||||
for aid in ids:
|
||||
with contextlib.suppress(Exception):
|
||||
http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
|
||||
|
||||
|
||||
def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None:
|
||||
"""Force a fast sync/seed pass by calling the seed endpoint."""
|
||||
session.post(base_url + "/api/assets/seed", json={"roots": ["models", "input", "output"]}, timeout=30)
|
||||
time.sleep(0.2)
|
||||
|
||||
|
||||
def get_asset_filename(asset_hash: str, extension: str) -> str:
|
||||
return asset_hash.removeprefix("blake3:") + extension
|
||||
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
|
||||
|
||||
28
tests-unit/assets_test/helpers.py
Normal file
28
tests-unit/assets_test/helpers.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""Helper functions for assets integration tests."""
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None:
|
||||
"""Force a synchronous sync/seed pass by calling the seed endpoint with wait=true.
|
||||
|
||||
Retries on 409 (already running) until the previous scan finishes.
|
||||
"""
|
||||
deadline = time.monotonic() + 60
|
||||
while True:
|
||||
r = session.post(
|
||||
base_url + "/api/assets/seed?wait=true",
|
||||
json={"roots": ["models", "input", "output"]},
|
||||
timeout=60,
|
||||
)
|
||||
if r.status_code != 409:
|
||||
assert r.status_code == 200, f"seed endpoint returned {r.status_code}: {r.text}"
|
||||
return
|
||||
if time.monotonic() > deadline:
|
||||
raise TimeoutError("seed endpoint stuck in 409 (already running)")
|
||||
time.sleep(0.25)
|
||||
|
||||
|
||||
def get_asset_filename(asset_hash: str, extension: str) -> str:
|
||||
return asset_hash.removeprefix("blake3:") + extension
|
||||
20
tests-unit/assets_test/queries/conftest.py
Normal file
20
tests-unit/assets_test/queries/conftest.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Base
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session():
|
||||
"""In-memory SQLite session for fast unit tests."""
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
with Session(engine) as sess:
|
||||
yield sess
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def autoclean_unit_test_assets():
|
||||
"""Override parent autouse fixture - query tests don't need server cleanup."""
|
||||
yield
|
||||
144
tests-unit/assets_test/queries/test_asset.py
Normal file
144
tests-unit/assets_test/queries/test_asset.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.database.models import Asset
|
||||
from app.assets.database.queries import (
|
||||
asset_exists_by_hash,
|
||||
get_asset_by_hash,
|
||||
upsert_asset,
|
||||
bulk_insert_assets,
|
||||
)
|
||||
|
||||
|
||||
class TestAssetExistsByHash:
|
||||
@pytest.mark.parametrize(
|
||||
"setup_hash,query_hash,expected",
|
||||
[
|
||||
(None, "nonexistent", False), # No asset exists
|
||||
("blake3:abc123", "blake3:abc123", True), # Asset exists with matching hash
|
||||
(None, "", False), # Null hash in DB doesn't match empty string
|
||||
],
|
||||
ids=["nonexistent", "existing", "null_hash_no_match"],
|
||||
)
|
||||
def test_exists_by_hash(self, session: Session, setup_hash, query_hash, expected):
|
||||
if setup_hash is not None or query_hash == "":
|
||||
asset = Asset(hash=setup_hash, size_bytes=100)
|
||||
session.add(asset)
|
||||
session.commit()
|
||||
|
||||
assert asset_exists_by_hash(session, asset_hash=query_hash) is expected
|
||||
|
||||
|
||||
class TestGetAssetByHash:
|
||||
@pytest.mark.parametrize(
|
||||
"setup_hash,query_hash,should_find",
|
||||
[
|
||||
(None, "nonexistent", False),
|
||||
("blake3:def456", "blake3:def456", True),
|
||||
],
|
||||
ids=["nonexistent", "existing"],
|
||||
)
|
||||
def test_get_by_hash(self, session: Session, setup_hash, query_hash, should_find):
|
||||
if setup_hash is not None:
|
||||
asset = Asset(hash=setup_hash, size_bytes=200, mime_type="image/png")
|
||||
session.add(asset)
|
||||
session.commit()
|
||||
|
||||
result = get_asset_by_hash(session, asset_hash=query_hash)
|
||||
if should_find:
|
||||
assert result is not None
|
||||
assert result.size_bytes == 200
|
||||
assert result.mime_type == "image/png"
|
||||
else:
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestUpsertAsset:
|
||||
@pytest.mark.parametrize(
|
||||
"first_size,first_mime,second_size,second_mime,expect_created,expect_updated,final_size,final_mime",
|
||||
[
|
||||
# New asset creation
|
||||
(None, None, 1024, "application/octet-stream", True, False, 1024, "application/octet-stream"),
|
||||
# Existing asset, same values - no update
|
||||
(500, "text/plain", 500, "text/plain", False, False, 500, "text/plain"),
|
||||
# Existing asset with size 0, update with new values
|
||||
(0, None, 2048, "image/png", False, True, 2048, "image/png"),
|
||||
# Existing asset, second call with size 0 - no update
|
||||
(1000, None, 0, None, False, False, 1000, None),
|
||||
],
|
||||
ids=["new_asset", "existing_no_change", "update_from_zero", "zero_size_no_update"],
|
||||
)
|
||||
def test_upsert_scenarios(
|
||||
self,
|
||||
session: Session,
|
||||
first_size,
|
||||
first_mime,
|
||||
second_size,
|
||||
second_mime,
|
||||
expect_created,
|
||||
expect_updated,
|
||||
final_size,
|
||||
final_mime,
|
||||
):
|
||||
asset_hash = f"blake3:test_{first_size}_{second_size}"
|
||||
|
||||
# First upsert (if first_size is not None, we're testing the second call)
|
||||
if first_size is not None:
|
||||
upsert_asset(
|
||||
session,
|
||||
asset_hash=asset_hash,
|
||||
size_bytes=first_size,
|
||||
mime_type=first_mime,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
# The upsert call we're testing
|
||||
asset, created, updated = upsert_asset(
|
||||
session,
|
||||
asset_hash=asset_hash,
|
||||
size_bytes=second_size,
|
||||
mime_type=second_mime,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is expect_created
|
||||
assert updated is expect_updated
|
||||
assert asset.size_bytes == final_size
|
||||
assert asset.mime_type == final_mime
|
||||
|
||||
|
||||
class TestBulkInsertAssets:
|
||||
def test_inserts_multiple_assets(self, session: Session):
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{"id": str(uuid.uuid4()), "hash": "blake3:bulk1", "size_bytes": 100, "mime_type": "text/plain", "created_at": now},
|
||||
{"id": str(uuid.uuid4()), "hash": "blake3:bulk2", "size_bytes": 200, "mime_type": "image/png", "created_at": now},
|
||||
{"id": str(uuid.uuid4()), "hash": "blake3:bulk3", "size_bytes": 300, "mime_type": None, "created_at": now},
|
||||
]
|
||||
bulk_insert_assets(session, rows)
|
||||
session.commit()
|
||||
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 3
|
||||
hashes = {a.hash for a in assets}
|
||||
assert hashes == {"blake3:bulk1", "blake3:bulk2", "blake3:bulk3"}
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
bulk_insert_assets(session, [])
|
||||
session.commit()
|
||||
assert session.query(Asset).count() == 0
|
||||
|
||||
def test_handles_large_batch(self, session: Session):
|
||||
"""Test chunking logic with more rows than MAX_BIND_PARAMS allows."""
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{"id": str(uuid.uuid4()), "hash": f"blake3:large{i}", "size_bytes": i, "mime_type": None, "created_at": now}
|
||||
for i in range(200)
|
||||
]
|
||||
bulk_insert_assets(session, rows)
|
||||
session.commit()
|
||||
|
||||
assert session.query(Asset).count() == 200
|
||||
517
tests-unit/assets_test/queries/test_asset_info.py
Normal file
517
tests-unit/assets_test/queries/test_asset_info.py
Normal file
@@ -0,0 +1,517 @@
|
||||
import time
|
||||
import uuid
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
|
||||
from app.assets.database.queries import (
|
||||
reference_exists_for_asset_id,
|
||||
get_reference_by_id,
|
||||
insert_reference,
|
||||
get_or_create_reference,
|
||||
update_reference_timestamps,
|
||||
list_references_page,
|
||||
fetch_reference_asset_and_tags,
|
||||
fetch_reference_and_asset,
|
||||
update_reference_access_time,
|
||||
set_reference_metadata,
|
||||
delete_reference_by_id,
|
||||
set_reference_preview,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
get_reference_ids_by_ids,
|
||||
ensure_tags_exist,
|
||||
add_tags_to_reference,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=size, mime_type="application/octet-stream")
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return ref
|
||||
|
||||
|
||||
class TestReferenceExistsForAssetId:
|
||||
def test_returns_false_when_no_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
assert reference_exists_for_asset_id(session, asset_id=asset.id) is False
|
||||
|
||||
def test_returns_true_when_reference_exists(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset)
|
||||
assert reference_exists_for_asset_id(session, asset_id=asset.id) is True
|
||||
|
||||
|
||||
class TestGetReferenceById:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
assert get_reference_by_id(session, reference_id="nonexistent") is None
|
||||
|
||||
def test_returns_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset, name="myfile.txt")
|
||||
|
||||
result = get_reference_by_id(session, reference_id=ref.id)
|
||||
assert result is not None
|
||||
assert result.name == "myfile.txt"
|
||||
|
||||
|
||||
class TestListReferencesPage:
|
||||
def test_empty_db(self, session: Session):
|
||||
refs, tag_map, total = list_references_page(session)
|
||||
assert refs == []
|
||||
assert tag_map == {}
|
||||
assert total == 0
|
||||
|
||||
def test_returns_references_with_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["alpha", "beta"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
|
||||
session.commit()
|
||||
|
||||
refs, tag_map, total = list_references_page(session)
|
||||
assert len(refs) == 1
|
||||
assert refs[0].id == ref.id
|
||||
assert set(tag_map[ref.id]) == {"alpha", "beta"}
|
||||
assert total == 1
|
||||
|
||||
def test_name_contains_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, name="model_v1.safetensors")
|
||||
_make_reference(session, asset, name="config.json")
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, name_contains="model")
|
||||
assert total == 1
|
||||
assert refs[0].name == "model_v1.safetensors"
|
||||
|
||||
def test_owner_visibility(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, name="public", owner_id="")
|
||||
_make_reference(session, asset, name="private", owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
# Empty owner sees only public
|
||||
refs, _, total = list_references_page(session, owner_id="")
|
||||
assert total == 1
|
||||
assert refs[0].name == "public"
|
||||
|
||||
# Owner sees both
|
||||
refs, _, total = list_references_page(session, owner_id="user1")
|
||||
assert total == 2
|
||||
|
||||
def test_include_tags_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1 = _make_reference(session, asset, name="tagged")
|
||||
_make_reference(session, asset, name="untagged")
|
||||
ensure_tags_exist(session, ["wanted"])
|
||||
add_tags_to_reference(session, reference_id=ref1.id, tags=["wanted"])
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, include_tags=["wanted"])
|
||||
assert total == 1
|
||||
assert refs[0].name == "tagged"
|
||||
|
||||
def test_exclude_tags_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, name="keep")
|
||||
ref_exclude = _make_reference(session, asset, name="exclude")
|
||||
ensure_tags_exist(session, ["bad"])
|
||||
add_tags_to_reference(session, reference_id=ref_exclude.id, tags=["bad"])
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, exclude_tags=["bad"])
|
||||
assert total == 1
|
||||
assert refs[0].name == "keep"
|
||||
|
||||
def test_sorting(self, session: Session):
|
||||
asset = _make_asset(session, "hash1", size=100)
|
||||
asset2 = _make_asset(session, "hash2", size=500)
|
||||
_make_reference(session, asset, name="small")
|
||||
_make_reference(session, asset2, name="large")
|
||||
session.commit()
|
||||
|
||||
refs, _, _ = list_references_page(session, sort="size", order="desc")
|
||||
assert refs[0].name == "large"
|
||||
|
||||
refs, _, _ = list_references_page(session, sort="name", order="asc")
|
||||
assert refs[0].name == "large"
|
||||
|
||||
|
||||
class TestFetchReferenceAssetAndTags:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
result = fetch_reference_asset_and_tags(session, "nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_tuple(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["tag1"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["tag1"])
|
||||
session.commit()
|
||||
|
||||
result = fetch_reference_asset_and_tags(session, ref.id)
|
||||
assert result is not None
|
||||
ret_ref, ret_asset, ret_tags = result
|
||||
assert ret_ref.id == ref.id
|
||||
assert ret_asset.id == asset.id
|
||||
assert ret_tags == ["tag1"]
|
||||
|
||||
|
||||
class TestFetchReferenceAndAsset:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
result = fetch_reference_and_asset(session, reference_id="nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_tuple(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = fetch_reference_and_asset(session, reference_id=ref.id)
|
||||
assert result is not None
|
||||
ret_ref, ret_asset = result
|
||||
assert ret_ref.id == ref.id
|
||||
assert ret_asset.id == asset.id
|
||||
|
||||
|
||||
class TestUpdateReferenceAccessTime:
|
||||
def test_updates_last_access_time(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
original_time = ref.last_access_time
|
||||
session.commit()
|
||||
|
||||
import time
|
||||
time.sleep(0.01)
|
||||
|
||||
update_reference_access_time(session, reference_id=ref.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.last_access_time > original_time
|
||||
|
||||
|
||||
class TestDeleteReferenceById:
|
||||
def test_deletes_existing(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="")
|
||||
assert result is True
|
||||
assert get_reference_by_id(session, reference_id=ref.id) is None
|
||||
|
||||
def test_returns_false_for_nonexistent(self, session: Session):
|
||||
result = delete_reference_by_id(session, reference_id="nonexistent", owner_id="")
|
||||
assert result is False
|
||||
|
||||
def test_respects_owner_visibility(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="user2")
|
||||
assert result is False
|
||||
assert get_reference_by_id(session, reference_id=ref.id) is not None
|
||||
|
||||
|
||||
class TestSetReferencePreview:
|
||||
def test_sets_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id == preview_asset.id
|
||||
|
||||
def test_clears_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
ref = _make_reference(session, asset)
|
||||
ref.preview_id = preview_asset.id
|
||||
session.commit()
|
||||
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id=None)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id is None
|
||||
|
||||
def test_raises_for_nonexistent_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None)
|
||||
|
||||
def test_raises_for_nonexistent_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(ValueError, match="Preview Asset"):
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent")
|
||||
|
||||
|
||||
class TestInsertReference:
|
||||
def test_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = insert_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="test.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert ref is not None
|
||||
assert ref.name == "test.bin"
|
||||
assert ref.owner_id == "user1"
|
||||
|
||||
def test_allows_duplicate_names(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1 = insert_reference(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
|
||||
session.commit()
|
||||
|
||||
# Duplicate names are now allowed
|
||||
ref2 = insert_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="dup.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert ref1 is not None
|
||||
assert ref2 is not None
|
||||
assert ref1.id != ref2.id
|
||||
|
||||
|
||||
class TestGetOrCreateReference:
|
||||
def test_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref, created = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="new.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is True
|
||||
assert ref.name == "new.bin"
|
||||
|
||||
def test_always_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1, created1 = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
# Duplicate names are allowed, so always creates new
|
||||
ref2, created2 = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created1 is True
|
||||
assert created2 is True
|
||||
assert ref1.id != ref2.id
|
||||
|
||||
|
||||
class TestUpdateReferenceTimestamps:
|
||||
def test_updates_timestamps(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
original_updated_at = ref.updated_at
|
||||
session.commit()
|
||||
|
||||
time.sleep(0.01)
|
||||
update_reference_timestamps(session, ref)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.updated_at > original_updated_at
|
||||
|
||||
def test_updates_preview_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
update_reference_timestamps(session, ref, preview_id=preview_asset.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id == preview_asset.id
|
||||
|
||||
|
||||
class TestSetReferenceMetadata:
|
||||
def test_sets_metadata(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"key": "value"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.user_metadata == {"key": "value"}
|
||||
# Check metadata table
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "key"
|
||||
assert meta[0].val_str == "value"
|
||||
|
||||
def test_replaces_existing_metadata(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"old": "data"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"new": "data"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "new"
|
||||
|
||||
def test_clears_metadata_with_empty_dict(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"key": "value"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.refresh(ref)
|
||||
assert ref.user_metadata == {}
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 0
|
||||
|
||||
def test_raises_for_nonexistent(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_reference_metadata(
|
||||
session, reference_id="nonexistent", user_metadata={"key": "value"}
|
||||
)
|
||||
|
||||
|
||||
class TestBulkInsertReferencesIgnoreConflicts:
|
||||
def test_inserts_multiple_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"owner_id": "",
|
||||
"name": "bulk1.bin",
|
||||
"asset_id": asset.id,
|
||||
"preview_id": None,
|
||||
"user_metadata": {},
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"owner_id": "",
|
||||
"name": "bulk2.bin",
|
||||
"asset_id": asset.id,
|
||||
"preview_id": None,
|
||||
"user_metadata": {},
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 2
|
||||
|
||||
def test_allows_duplicate_names(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, name="existing.bin", owner_id="")
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"owner_id": "",
|
||||
"name": "existing.bin",
|
||||
"asset_id": asset.id,
|
||||
"preview_id": None,
|
||||
"user_metadata": {},
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"owner_id": "",
|
||||
"name": "new.bin",
|
||||
"asset_id": asset.id,
|
||||
"preview_id": None,
|
||||
"user_metadata": {},
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
# Duplicate names allowed, so all 3 rows exist
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 3
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
bulk_insert_references_ignore_conflicts(session, [])
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
|
||||
class TestGetReferenceIdsByIds:
|
||||
def test_returns_existing_ids(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1 = _make_reference(session, asset, name="a.bin")
|
||||
ref2 = _make_reference(session, asset, name="b.bin")
|
||||
session.commit()
|
||||
|
||||
found = get_reference_ids_by_ids(session, [ref1.id, ref2.id, "nonexistent"])
|
||||
|
||||
assert found == {ref1.id, ref2.id}
|
||||
|
||||
def test_empty_list_returns_empty(self, session: Session):
|
||||
found = get_reference_ids_by_ids(session, [])
|
||||
assert found == set()
|
||||
499
tests-unit/assets_test/queries/test_cache_state.py
Normal file
499
tests-unit/assets_test/queries/test_cache_state.py
Normal file
@@ -0,0 +1,499 @@
|
||||
"""Tests for cache_state (AssetReference file path) query functions."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import (
|
||||
list_references_by_asset_id,
|
||||
upsert_reference,
|
||||
get_unreferenced_unhashed_asset_ids,
|
||||
delete_assets_by_ids,
|
||||
get_references_for_prefixes,
|
||||
bulk_update_needs_verify,
|
||||
delete_references_by_ids,
|
||||
delete_orphaned_seed_asset,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
get_references_by_paths_and_asset_ids,
|
||||
mark_references_missing_outside_prefixes,
|
||||
restore_references_by_paths,
|
||||
)
|
||||
from app.assets.helpers import select_best_live_path, get_utc_now
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=size)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
file_path: str,
|
||||
name: str = "test",
|
||||
mtime_ns: int | None = None,
|
||||
needs_verify: bool = False,
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
asset_id=asset.id,
|
||||
file_path=file_path,
|
||||
name=name,
|
||||
mtime_ns=mtime_ns,
|
||||
needs_verify=needs_verify,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return ref
|
||||
|
||||
|
||||
class TestListReferencesByAssetId:
|
||||
def test_returns_empty_for_no_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
||||
assert list(refs) == []
|
||||
|
||||
def test_returns_references_for_asset(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "/path/a.bin", name="a")
|
||||
_make_reference(session, asset, "/path/b.bin", name="b")
|
||||
session.commit()
|
||||
|
||||
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
||||
paths = [r.file_path for r in refs]
|
||||
assert set(paths) == {"/path/a.bin", "/path/b.bin"}
|
||||
|
||||
def test_does_not_return_other_assets_references(self, session: Session):
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
_make_reference(session, asset1, "/path/asset1.bin", name="a1")
|
||||
_make_reference(session, asset2, "/path/asset2.bin", name="a2")
|
||||
session.commit()
|
||||
|
||||
refs = list_references_by_asset_id(session, asset_id=asset1.id)
|
||||
paths = [r.file_path for r in refs]
|
||||
assert paths == ["/path/asset1.bin"]
|
||||
|
||||
|
||||
class TestSelectBestLivePath:
|
||||
def test_returns_empty_for_empty_list(self):
|
||||
result = select_best_live_path([])
|
||||
assert result == ""
|
||||
|
||||
def test_returns_empty_when_no_files_exist(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset, "/nonexistent/path.bin")
|
||||
session.commit()
|
||||
|
||||
result = select_best_live_path([ref])
|
||||
assert result == ""
|
||||
|
||||
def test_prefers_verified_path(self, session: Session, tmp_path):
|
||||
"""needs_verify=False should be preferred."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
|
||||
verified_file = tmp_path / "verified.bin"
|
||||
verified_file.write_bytes(b"data")
|
||||
|
||||
unverified_file = tmp_path / "unverified.bin"
|
||||
unverified_file.write_bytes(b"data")
|
||||
|
||||
ref_verified = _make_reference(
|
||||
session, asset, str(verified_file), name="verified", needs_verify=False
|
||||
)
|
||||
ref_unverified = _make_reference(
|
||||
session, asset, str(unverified_file), name="unverified", needs_verify=True
|
||||
)
|
||||
session.commit()
|
||||
|
||||
refs = [ref_unverified, ref_verified]
|
||||
result = select_best_live_path(refs)
|
||||
assert result == str(verified_file)
|
||||
|
||||
def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
|
||||
"""If all references need verification, return first existing path."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
|
||||
existing_file = tmp_path / "exists.bin"
|
||||
existing_file.write_bytes(b"data")
|
||||
|
||||
ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
|
||||
session.commit()
|
||||
|
||||
result = select_best_live_path([ref])
|
||||
assert result == str(existing_file)
|
||||
|
||||
|
||||
class TestSelectBestLivePathWithMocking:
|
||||
def test_handles_missing_file_path_attr(self):
|
||||
"""Gracefully handle references with None file_path."""
|
||||
|
||||
class MockRef:
|
||||
file_path = None
|
||||
needs_verify = False
|
||||
|
||||
result = select_best_live_path([MockRef()])
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestUpsertReference:
|
||||
@pytest.mark.parametrize(
|
||||
"initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
|
||||
[
|
||||
# New reference creation
|
||||
(None, 12345, True, False, 12345),
|
||||
# Existing reference, same mtime - no update
|
||||
(100, 100, False, False, 100),
|
||||
# Existing reference, different mtime - update
|
||||
(100, 200, False, True, 200),
|
||||
],
|
||||
ids=["new_reference", "existing_no_change", "existing_update_mtime"],
|
||||
)
|
||||
def test_upsert_scenarios(
|
||||
self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
|
||||
name = f"file_{initial_mtime}_{second_mtime}"
|
||||
|
||||
# Create initial reference if needed
|
||||
if initial_mtime is not None:
|
||||
upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
|
||||
session.commit()
|
||||
|
||||
# The upsert call we're testing
|
||||
created, updated = upsert_reference(
|
||||
session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is expect_created
|
||||
assert updated is expect_updated
|
||||
ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
||||
assert ref.mtime_ns == final_mtime
|
||||
|
||||
def test_upsert_restores_missing_reference(self, session: Session):
|
||||
"""Upserting a reference that was marked missing should restore it."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
file_path = "/restored/file.bin"
|
||||
|
||||
ref = _make_reference(session, asset, file_path, mtime_ns=100)
|
||||
ref.is_missing = True
|
||||
session.commit()
|
||||
|
||||
created, updated = upsert_reference(
|
||||
session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is False
|
||||
assert updated is True
|
||||
restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
||||
assert restored_ref.is_missing is False
|
||||
|
||||
|
||||
class TestRestoreReferencesByPaths:
|
||||
def test_restores_missing_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
missing_path = "/missing/file.bin"
|
||||
active_path = "/active/file.bin"
|
||||
|
||||
missing_ref = _make_reference(session, asset, missing_path, name="missing")
|
||||
missing_ref.is_missing = True
|
||||
_make_reference(session, asset, active_path, name="active")
|
||||
session.commit()
|
||||
|
||||
restored = restore_references_by_paths(session, [missing_path])
|
||||
session.commit()
|
||||
|
||||
assert restored == 1
|
||||
ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
|
||||
assert ref.is_missing is False
|
||||
|
||||
def test_empty_list_restores_nothing(self, session: Session):
|
||||
restored = restore_references_by_paths(session, [])
|
||||
assert restored == 0
|
||||
|
||||
|
||||
class TestMarkReferencesMissingOutsidePrefixes:
|
||||
def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
|
||||
asset = _make_asset(session, "hash1")
|
||||
valid_dir = tmp_path / "valid"
|
||||
valid_dir.mkdir()
|
||||
invalid_dir = tmp_path / "invalid"
|
||||
invalid_dir.mkdir()
|
||||
|
||||
valid_path = str(valid_dir / "file.bin")
|
||||
invalid_path = str(invalid_dir / "file.bin")
|
||||
|
||||
_make_reference(session, asset, valid_path, name="valid")
|
||||
_make_reference(session, asset, invalid_path, name="invalid")
|
||||
session.commit()
|
||||
|
||||
marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
|
||||
session.commit()
|
||||
|
||||
assert marked == 1
|
||||
all_refs = session.query(AssetReference).all()
|
||||
assert len(all_refs) == 2
|
||||
|
||||
valid_ref = next(r for r in all_refs if r.file_path == valid_path)
|
||||
invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
|
||||
assert valid_ref.is_missing is False
|
||||
assert invalid_ref.is_missing is True
|
||||
|
||||
def test_empty_prefixes_marks_nothing(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "/some/path.bin")
|
||||
session.commit()
|
||||
|
||||
marked = mark_references_missing_outside_prefixes(session, [])
|
||||
|
||||
assert marked == 0
|
||||
|
||||
|
||||
class TestGetUnreferencedUnhashedAssetIds:
|
||||
def test_returns_unreferenced_unhashed_assets(self, session: Session):
|
||||
# Unhashed asset (hash=None) with no references (no file_path)
|
||||
no_refs = _make_asset(session, hash_val=None)
|
||||
# Unhashed asset with active reference (not unreferenced)
|
||||
with_active_ref = _make_asset(session, hash_val=None)
|
||||
_make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
|
||||
# Unhashed asset with only missing reference (should be unreferenced)
|
||||
with_missing_ref = _make_asset(session, hash_val=None)
|
||||
missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
|
||||
missing_ref.is_missing = True
|
||||
# Regular asset (hash not None) - should not be returned
|
||||
_make_asset(session, hash_val="blake3:regular")
|
||||
session.commit()
|
||||
|
||||
unreferenced = get_unreferenced_unhashed_asset_ids(session)
|
||||
|
||||
assert no_refs.id in unreferenced
|
||||
assert with_missing_ref.id in unreferenced
|
||||
assert with_active_ref.id not in unreferenced
|
||||
|
||||
|
||||
class TestDeleteAssetsByIds:
|
||||
def test_deletes_assets_and_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "/test/path.bin", name="test")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_assets_by_ids(session, [asset.id])
|
||||
session.commit()
|
||||
|
||||
assert deleted == 1
|
||||
assert session.query(Asset).count() == 0
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
def test_empty_list_deletes_nothing(self, session: Session):
|
||||
_make_asset(session, "hash1")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_assets_by_ids(session, [])
|
||||
|
||||
assert deleted == 0
|
||||
assert session.query(Asset).count() == 1
|
||||
|
||||
|
||||
class TestGetReferencesForPrefixes:
|
||||
def test_returns_references_matching_prefix(self, session: Session, tmp_path):
|
||||
asset = _make_asset(session, "hash1")
|
||||
dir1 = tmp_path / "dir1"
|
||||
dir1.mkdir()
|
||||
dir2 = tmp_path / "dir2"
|
||||
dir2.mkdir()
|
||||
|
||||
path1 = str(dir1 / "file.bin")
|
||||
path2 = str(dir2 / "file.bin")
|
||||
|
||||
_make_reference(session, asset, path1, name="file1", mtime_ns=100)
|
||||
_make_reference(session, asset, path2, name="file2", mtime_ns=200)
|
||||
session.commit()
|
||||
|
||||
rows = get_references_for_prefixes(session, [str(dir1)])
|
||||
|
||||
assert len(rows) == 1
|
||||
assert rows[0].file_path == path1
|
||||
|
||||
def test_empty_prefixes_returns_empty(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "/some/path.bin")
|
||||
session.commit()
|
||||
|
||||
rows = get_references_for_prefixes(session, [])
|
||||
|
||||
assert rows == []
|
||||
|
||||
|
||||
class TestBulkSetNeedsVerify:
|
||||
def test_sets_needs_verify_flag(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
|
||||
ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
|
||||
session.commit()
|
||||
|
||||
updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
|
||||
session.commit()
|
||||
|
||||
assert updated == 2
|
||||
session.refresh(ref1)
|
||||
session.refresh(ref2)
|
||||
assert ref1.needs_verify is True
|
||||
assert ref2.needs_verify is True
|
||||
|
||||
def test_empty_list_updates_nothing(self, session: Session):
|
||||
updated = bulk_update_needs_verify(session, [], True)
|
||||
assert updated == 0
|
||||
|
||||
|
||||
class TestDeleteReferencesByIds:
|
||||
def test_deletes_references_by_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref1 = _make_reference(session, asset, "/path1.bin")
|
||||
_make_reference(session, asset, "/path2.bin")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_references_by_ids(session, [ref1.id])
|
||||
session.commit()
|
||||
|
||||
assert deleted == 1
|
||||
assert session.query(AssetReference).count() == 1
|
||||
|
||||
def test_empty_list_deletes_nothing(self, session: Session):
|
||||
deleted = delete_references_by_ids(session, [])
|
||||
assert deleted == 0
|
||||
|
||||
|
||||
class TestDeleteOrphanedSeedAsset:
|
||||
@pytest.mark.parametrize(
|
||||
"create_asset,expected_deleted,expected_count",
|
||||
[
|
||||
(True, True, 0), # Existing asset gets deleted
|
||||
(False, False, 0), # Nonexistent returns False
|
||||
],
|
||||
ids=["deletes_existing", "nonexistent_returns_false"],
|
||||
)
|
||||
def test_delete_orphaned_seed_asset(
|
||||
self, session: Session, create_asset, expected_deleted, expected_count
|
||||
):
|
||||
asset_id = "nonexistent-id"
|
||||
if create_asset:
|
||||
asset = _make_asset(session, hash_val=None)
|
||||
asset_id = asset.id
|
||||
_make_reference(session, asset, "/test/path.bin", name="test")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_orphaned_seed_asset(session, asset_id)
|
||||
if create_asset:
|
||||
session.commit()
|
||||
|
||||
assert deleted is expected_deleted
|
||||
assert session.query(Asset).count() == expected_count
|
||||
|
||||
|
||||
class TestBulkInsertReferencesIgnoreConflicts:
|
||||
def test_inserts_multiple_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/bulk1.bin",
|
||||
"name": "bulk1",
|
||||
"mtime_ns": 100,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/bulk2.bin",
|
||||
"name": "bulk2",
|
||||
"mtime_ns": 200,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
assert session.query(AssetReference).count() == 2
|
||||
|
||||
def test_ignores_conflicts(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "/existing.bin", mtime_ns=100)
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/existing.bin",
|
||||
"name": "existing",
|
||||
"mtime_ns": 999,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/new.bin",
|
||||
"name": "new",
|
||||
"mtime_ns": 200,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
assert session.query(AssetReference).count() == 2
|
||||
existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
|
||||
assert existing.mtime_ns == 100 # Original value preserved
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
bulk_insert_references_ignore_conflicts(session, [])
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
|
||||
class TestGetReferencesByPathsAndAssetIds:
|
||||
def test_returns_matching_paths(self, session: Session):
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
|
||||
_make_reference(session, asset1, "/path1.bin")
|
||||
_make_reference(session, asset2, "/path2.bin")
|
||||
session.commit()
|
||||
|
||||
path_to_asset = {
|
||||
"/path1.bin": asset1.id,
|
||||
"/path2.bin": asset2.id,
|
||||
}
|
||||
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
||||
|
||||
assert winners == {"/path1.bin", "/path2.bin"}
|
||||
|
||||
def test_excludes_non_matching_asset_ids(self, session: Session):
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
|
||||
_make_reference(session, asset1, "/path1.bin")
|
||||
session.commit()
|
||||
|
||||
# Path exists but with different asset_id
|
||||
path_to_asset = {"/path1.bin": asset2.id}
|
||||
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
||||
|
||||
assert winners == set()
|
||||
|
||||
def test_empty_dict_returns_empty(self, session: Session):
|
||||
winners = get_references_by_paths_and_asset_ids(session, {})
|
||||
assert winners == set()
|
||||
184
tests-unit/assets_test/queries/test_metadata.py
Normal file
184
tests-unit/assets_test/queries/test_metadata.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Tests for metadata filtering logic in asset_reference queries."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
|
||||
from app.assets.database.queries import list_references_page
|
||||
from app.assets.database.queries.asset_reference import convert_metadata_to_rows
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str) -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=1024)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str,
|
||||
metadata: dict | None = None,
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
owner_id="",
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
user_metadata=metadata,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
|
||||
if metadata:
|
||||
for key, val in metadata.items():
|
||||
for row in convert_metadata_to_rows(key, val):
|
||||
meta_row = AssetReferenceMeta(
|
||||
asset_reference_id=ref.id,
|
||||
key=row["key"],
|
||||
ordinal=row.get("ordinal", 0),
|
||||
val_str=row.get("val_str"),
|
||||
val_num=row.get("val_num"),
|
||||
val_bool=row.get("val_bool"),
|
||||
val_json=row.get("val_json"),
|
||||
)
|
||||
session.add(meta_row)
|
||||
session.flush()
|
||||
|
||||
return ref
|
||||
|
||||
|
||||
class TestMetadataFilterByType:
|
||||
"""Table-driven tests for metadata filtering by different value types."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"match_meta,nomatch_meta,filter_key,filter_val",
|
||||
[
|
||||
# String matching
|
||||
({"category": "models"}, {"category": "images"}, "category", "models"),
|
||||
# Integer matching
|
||||
({"epoch": 5}, {"epoch": 10}, "epoch", 5),
|
||||
# Float matching
|
||||
({"score": 0.95}, {"score": 0.5}, "score", 0.95),
|
||||
# Boolean True matching
|
||||
({"enabled": True}, {"enabled": False}, "enabled", True),
|
||||
# Boolean False matching
|
||||
({"enabled": False}, {"enabled": True}, "enabled", False),
|
||||
],
|
||||
ids=["string", "int", "float", "bool_true", "bool_false"],
|
||||
)
|
||||
def test_filter_matches_correct_value(
|
||||
self, session: Session, match_meta, nomatch_meta, filter_key, filter_val
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "match", match_meta)
|
||||
_make_reference(session, asset, "nomatch", nomatch_meta)
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={filter_key: filter_val}
|
||||
)
|
||||
assert total == 1
|
||||
assert refs[0].name == "match"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stored_meta,filter_key,filter_val",
|
||||
[
|
||||
# String no match
|
||||
({"category": "models"}, "category", "other"),
|
||||
# Int no match
|
||||
({"epoch": 5}, "epoch", 99),
|
||||
# Float no match
|
||||
({"score": 0.5}, "score", 0.99),
|
||||
],
|
||||
ids=["string_no_match", "int_no_match", "float_no_match"],
|
||||
)
|
||||
def test_filter_returns_empty_when_no_match(
|
||||
self, session: Session, stored_meta, filter_key, filter_val
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "item", stored_meta)
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={filter_key: filter_val}
|
||||
)
|
||||
assert total == 0
|
||||
|
||||
|
||||
class TestMetadataFilterNull:
|
||||
"""Tests for null/missing key filtering."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"match_name,match_meta,nomatch_name,nomatch_meta,filter_key",
|
||||
[
|
||||
# Null matches missing key
|
||||
("missing_key", {}, "has_key", {"optional": "value"}, "optional"),
|
||||
# Null matches explicit null
|
||||
("explicit_null", {"nullable": None}, "has_value", {"nullable": "present"}, "nullable"),
|
||||
],
|
||||
ids=["missing_key", "explicit_null"],
|
||||
)
|
||||
def test_null_filter_matches(
|
||||
self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, match_name, match_meta)
|
||||
_make_reference(session, asset, nomatch_name, nomatch_meta)
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, metadata_filter={filter_key: None})
|
||||
assert total == 1
|
||||
assert refs[0].name == match_name
|
||||
|
||||
|
||||
class TestMetadataFilterList:
|
||||
"""Tests for list-based (OR) filtering."""
|
||||
|
||||
def test_filter_by_list_matches_any(self, session: Session):
|
||||
"""List values should match ANY of the values (OR)."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "cat_a", {"category": "a"})
|
||||
_make_reference(session, asset, "cat_b", {"category": "b"})
|
||||
_make_reference(session, asset, "cat_c", {"category": "c"})
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, metadata_filter={"category": ["a", "b"]})
|
||||
assert total == 2
|
||||
names = {r.name for r in refs}
|
||||
assert names == {"cat_a", "cat_b"}
|
||||
|
||||
|
||||
class TestMetadataFilterMultipleKeys:
|
||||
"""Tests for multiple filter keys (AND semantics)."""
|
||||
|
||||
def test_multiple_keys_must_all_match(self, session: Session):
|
||||
"""Multiple keys should ALL match (AND)."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "match", {"type": "model", "version": 2})
|
||||
_make_reference(session, asset, "wrong_type", {"type": "config", "version": 2})
|
||||
_make_reference(session, asset, "wrong_version", {"type": "model", "version": 1})
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={"type": "model", "version": 2}
|
||||
)
|
||||
assert total == 1
|
||||
assert refs[0].name == "match"
|
||||
|
||||
|
||||
class TestMetadataFilterEmptyDict:
|
||||
"""Tests for empty filter behavior."""
|
||||
|
||||
def test_empty_filter_returns_all(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_reference(session, asset, "a", {"key": "val"})
|
||||
_make_reference(session, asset, "b", {})
|
||||
session.commit()
|
||||
|
||||
refs, _, total = list_references_page(session, metadata_filter={})
|
||||
assert total == 2
|
||||
366
tests-unit/assets_test/queries/test_tags.py
Normal file
366
tests-unit/assets_test/queries/test_tags.py
Normal file
@@ -0,0 +1,366 @@
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, AssetReferenceMeta, Tag
|
||||
from app.assets.database.queries import (
|
||||
ensure_tags_exist,
|
||||
get_reference_tags,
|
||||
set_reference_tags,
|
||||
add_tags_to_reference,
|
||||
remove_tags_from_reference,
|
||||
add_missing_tag_for_asset_id,
|
||||
remove_missing_tag_for_asset_id,
|
||||
list_tags_with_usage,
|
||||
bulk_insert_tags_and_meta,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str | None = None) -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=1024)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return ref
|
||||
|
||||
|
||||
class TestEnsureTagsExist:
|
||||
def test_creates_new_tags(self, session: Session):
|
||||
ensure_tags_exist(session, ["alpha", "beta"], tag_type="user")
|
||||
session.commit()
|
||||
|
||||
tags = session.query(Tag).all()
|
||||
assert {t.name for t in tags} == {"alpha", "beta"}
|
||||
|
||||
def test_is_idempotent(self, session: Session):
|
||||
ensure_tags_exist(session, ["alpha"], tag_type="user")
|
||||
ensure_tags_exist(session, ["alpha"], tag_type="user")
|
||||
session.commit()
|
||||
|
||||
assert session.query(Tag).count() == 1
|
||||
|
||||
def test_normalizes_tags(self, session: Session):
|
||||
ensure_tags_exist(session, [" ALPHA ", "Beta", "alpha"])
|
||||
session.commit()
|
||||
|
||||
tags = session.query(Tag).all()
|
||||
assert {t.name for t in tags} == {"alpha", "beta"}
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
ensure_tags_exist(session, [])
|
||||
session.commit()
|
||||
assert session.query(Tag).count() == 0
|
||||
|
||||
def test_tag_type_is_set(self, session: Session):
|
||||
ensure_tags_exist(session, ["system-tag"], tag_type="system")
|
||||
session.commit()
|
||||
|
||||
tag = session.query(Tag).filter_by(name="system-tag").one()
|
||||
assert tag.tag_type == "system"
|
||||
|
||||
|
||||
class TestGetReferenceTags:
|
||||
def test_returns_empty_for_no_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert tags == []
|
||||
|
||||
def test_returns_tags_for_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
ensure_tags_exist(session, ["tag1", "tag2"])
|
||||
session.add_all([
|
||||
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
|
||||
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
|
||||
])
|
||||
session.flush()
|
||||
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert set(tags) == {"tag1", "tag2"}
|
||||
|
||||
|
||||
class TestSetReferenceTags:
|
||||
def test_adds_new_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
|
||||
session.commit()
|
||||
|
||||
assert set(result.added) == {"a", "b"}
|
||||
assert result.removed == []
|
||||
assert set(result.total) == {"a", "b"}
|
||||
|
||||
def test_removes_old_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["a"])
|
||||
session.commit()
|
||||
|
||||
assert result.added == []
|
||||
assert set(result.removed) == {"b", "c"}
|
||||
assert result.total == ["a"]
|
||||
|
||||
def test_replaces_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["b", "c"])
|
||||
session.commit()
|
||||
|
||||
assert result.added == ["c"]
|
||||
assert result.removed == ["a"]
|
||||
assert set(result.total) == {"b", "c"}
|
||||
|
||||
|
||||
class TestAddTagsToReference:
|
||||
def test_adds_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
|
||||
session.commit()
|
||||
|
||||
assert set(result.added) == {"x", "y"}
|
||||
assert result.already_present == []
|
||||
|
||||
def test_reports_already_present(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["x"])
|
||||
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
|
||||
session.commit()
|
||||
|
||||
assert result.added == ["y"]
|
||||
assert result.already_present == ["x"]
|
||||
|
||||
def test_raises_for_missing_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
|
||||
|
||||
|
||||
class TestRemoveTagsFromReference:
|
||||
def test_removes_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "b"])
|
||||
session.commit()
|
||||
|
||||
assert set(result.removed) == {"a", "b"}
|
||||
assert result.not_present == []
|
||||
assert result.total_tags == ["c"]
|
||||
|
||||
def test_reports_not_present(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a"])
|
||||
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "x"])
|
||||
session.commit()
|
||||
|
||||
assert result.removed == ["a"]
|
||||
assert result.not_present == ["x"]
|
||||
|
||||
def test_raises_for_missing_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
remove_tags_from_reference(session, reference_id="nonexistent", tags=["x"])
|
||||
|
||||
|
||||
class TestMissingTagFunctions:
|
||||
def test_add_missing_tag_for_asset_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert "missing" in tags
|
||||
|
||||
def test_add_missing_tag_is_idempotent(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="missing").all()
|
||||
assert len(links) == 1
|
||||
|
||||
def test_remove_missing_tag_for_asset_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
|
||||
remove_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert "missing" not in tags
|
||||
|
||||
|
||||
class TestListTagsWithUsage:
|
||||
def test_returns_tags_with_counts(self, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags_with_usage(session)
|
||||
|
||||
tag_dict = {name: count for name, _, count in rows}
|
||||
assert tag_dict["used"] == 1
|
||||
assert tag_dict["unused"] == 0
|
||||
assert total == 2
|
||||
|
||||
def test_exclude_zero_counts(self, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags_with_usage(session, include_zero=False)
|
||||
|
||||
tag_names = {name for name, _, _ in rows}
|
||||
assert "used" in tag_names
|
||||
assert "unused" not in tag_names
|
||||
|
||||
def test_prefix_filter(self, session: Session):
|
||||
ensure_tags_exist(session, ["alpha", "beta", "alphabet"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags_with_usage(session, prefix="alph")
|
||||
|
||||
tag_names = {name for name, _, _ in rows}
|
||||
assert tag_names == {"alpha", "alphabet"}
|
||||
|
||||
def test_order_by_name(self, session: Session):
|
||||
ensure_tags_exist(session, ["zebra", "alpha", "middle"])
|
||||
session.commit()
|
||||
|
||||
rows, _ = list_tags_with_usage(session, order="name_asc")
|
||||
|
||||
names = [name for name, _, _ in rows]
|
||||
assert names == ["alpha", "middle", "zebra"]
|
||||
|
||||
def test_owner_visibility(self, session: Session):
|
||||
ensure_tags_exist(session, ["shared-tag", "owner-tag"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
shared_ref = _make_reference(session, asset, name="shared", owner_id="")
|
||||
owner_ref = _make_reference(session, asset, name="owned", owner_id="user1")
|
||||
|
||||
add_tags_to_reference(session, reference_id=shared_ref.id, tags=["shared-tag"])
|
||||
add_tags_to_reference(session, reference_id=owner_ref.id, tags=["owner-tag"])
|
||||
session.commit()
|
||||
|
||||
# Empty owner sees only shared
|
||||
rows, _ = list_tags_with_usage(session, owner_id="", include_zero=False)
|
||||
tag_dict = {name: count for name, _, count in rows}
|
||||
assert tag_dict.get("shared-tag", 0) == 1
|
||||
assert tag_dict.get("owner-tag", 0) == 0
|
||||
|
||||
# User1 sees both
|
||||
rows, _ = list_tags_with_usage(session, owner_id="user1", include_zero=False)
|
||||
tag_dict = {name: count for name, _, count in rows}
|
||||
assert tag_dict.get("shared-tag", 0) == 1
|
||||
assert tag_dict.get("owner-tag", 0) == 1
|
||||
|
||||
|
||||
class TestBulkInsertTagsAndMeta:
|
||||
def test_inserts_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"])
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
tag_rows = [
|
||||
{"asset_reference_id": ref.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
|
||||
{"asset_reference_id": ref.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
|
||||
]
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
|
||||
session.commit()
|
||||
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert set(tags) == {"bulk-tag1", "bulk-tag2"}
|
||||
|
||||
def test_inserts_meta(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
meta_rows = [
|
||||
{
|
||||
"asset_reference_id": ref.id,
|
||||
"key": "meta-key",
|
||||
"ordinal": 0,
|
||||
"val_str": "meta-value",
|
||||
"val_num": None,
|
||||
"val_bool": None,
|
||||
"val_json": None,
|
||||
},
|
||||
]
|
||||
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows)
|
||||
session.commit()
|
||||
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "meta-key"
|
||||
assert meta[0].val_str == "meta-value"
|
||||
|
||||
def test_ignores_conflicts(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["existing-tag"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["existing-tag"])
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
tag_rows = [
|
||||
{"asset_reference_id": ref.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
|
||||
]
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
|
||||
session.commit()
|
||||
|
||||
# Should still have only one tag link
|
||||
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="existing-tag").all()
|
||||
assert len(links) == 1
|
||||
# Origin should be original, not overwritten
|
||||
assert links[0].origin == "manual"
|
||||
|
||||
def test_empty_lists_is_noop(self, session: Session):
|
||||
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[])
|
||||
assert session.query(AssetReferenceTag).count() == 0
|
||||
assert session.query(AssetReferenceMeta).count() == 0
|
||||
1
tests-unit/assets_test/services/__init__.py
Normal file
1
tests-unit/assets_test/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Service layer tests
|
||||
54
tests-unit/assets_test/services/conftest.py
Normal file
54
tests-unit/assets_test/services/conftest.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Base
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def autoclean_unit_test_assets():
|
||||
"""Override parent autouse fixture - service unit tests don't need server cleanup."""
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_engine():
|
||||
"""In-memory SQLite engine for fast unit tests."""
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
return engine
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(db_engine):
|
||||
"""Session fixture for tests that need direct DB access."""
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_create_session(db_engine):
|
||||
"""Patch create_session to use our in-memory database."""
|
||||
from contextlib import contextmanager
|
||||
from sqlalchemy.orm import Session as SASession
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with SASession(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
with patch("app.assets.services.ingest.create_session", _create_session), \
|
||||
patch("app.assets.services.asset_management.create_session", _create_session), \
|
||||
patch("app.assets.services.tagging.create_session", _create_session):
|
||||
yield _create_session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir():
|
||||
"""Temporary directory for file operations."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir)
|
||||
268
tests-unit/assets_test/services/test_asset_management.py
Normal file
268
tests-unit/assets_test/services/test_asset_management.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""Tests for asset_management services."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.services import (
|
||||
get_asset_detail,
|
||||
update_asset_metadata,
|
||||
delete_asset_reference,
|
||||
set_asset_preview,
|
||||
)
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 1024) -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=size, mime_type="application/octet-stream")
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return ref
|
||||
|
||||
|
||||
class TestGetAssetDetail:
|
||||
def test_returns_none_for_nonexistent(self, mock_create_session):
|
||||
result = get_asset_detail(reference_id="nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_asset_with_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["alpha", "beta"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
|
||||
session.commit()
|
||||
|
||||
result = get_asset_detail(reference_id=ref.id)
|
||||
|
||||
assert result is not None
|
||||
assert result.ref.id == ref.id
|
||||
assert result.asset.hash == asset.hash
|
||||
assert set(result.tags) == {"alpha", "beta"}
|
||||
|
||||
def test_respects_owner_visibility(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
# Wrong owner cannot see
|
||||
result = get_asset_detail(reference_id=ref.id, owner_id="user2")
|
||||
assert result is None
|
||||
|
||||
# Correct owner can see
|
||||
result = get_asset_detail(reference_id=ref.id, owner_id="user1")
|
||||
assert result is not None
|
||||
|
||||
|
||||
class TestUpdateAssetMetadata:
|
||||
def test_updates_name(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, name="old_name.bin")
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
update_asset_metadata(
|
||||
reference_id=ref_id,
|
||||
name="new_name.bin",
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.name == "new_name.bin"
|
||||
|
||||
def test_updates_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["old"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["old"])
|
||||
session.commit()
|
||||
|
||||
result = update_asset_metadata(
|
||||
reference_id=ref.id,
|
||||
tags=["new1", "new2"],
|
||||
)
|
||||
|
||||
assert set(result.tags) == {"new1", "new2"}
|
||||
assert "old" not in result.tags
|
||||
|
||||
def test_updates_user_metadata(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
update_asset_metadata(
|
||||
reference_id=ref_id,
|
||||
user_metadata={"key": "value", "num": 42},
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.user_metadata["key"] == "value"
|
||||
assert updated_ref.user_metadata["num"] == 42
|
||||
|
||||
def test_raises_for_nonexistent(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
update_asset_metadata(reference_id="nonexistent", name="fail")
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
update_asset_metadata(
|
||||
reference_id=ref.id,
|
||||
name="new",
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
|
||||
class TestDeleteAssetReference:
|
||||
def test_soft_deletes_reference(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_reference(
|
||||
reference_id=ref_id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=False,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# Row still exists but is marked as soft-deleted
|
||||
session.expire_all()
|
||||
row = session.get(AssetReference, ref_id)
|
||||
assert row is not None
|
||||
assert row.deleted_at is not None
|
||||
|
||||
def test_returns_false_for_nonexistent(self, mock_create_session):
|
||||
result = delete_asset_reference(
|
||||
reference_id="nonexistent",
|
||||
owner_id="",
|
||||
)
|
||||
assert result is False
|
||||
|
||||
def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_reference(
|
||||
reference_id=ref_id,
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
assert result is False
|
||||
assert session.get(AssetReference, ref_id) is not None
|
||||
|
||||
def test_keeps_asset_if_other_references_exist(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref1 = _make_reference(session, asset, name="ref1")
|
||||
_make_reference(session, asset, name="ref2") # Second ref keeps asset alive
|
||||
asset_id = asset.id
|
||||
session.commit()
|
||||
|
||||
delete_asset_reference(
|
||||
reference_id=ref1.id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=True,
|
||||
)
|
||||
|
||||
# Asset should still exist
|
||||
assert session.get(Asset, asset_id) is not None
|
||||
|
||||
def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
asset_id = asset.id
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
delete_asset_reference(
|
||||
reference_id=ref_id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=True,
|
||||
)
|
||||
|
||||
# Both ref and asset should be gone
|
||||
assert session.get(AssetReference, ref_id) is None
|
||||
assert session.get(Asset, asset_id) is None
|
||||
|
||||
|
||||
class TestSetAssetPreview:
|
||||
def test_sets_preview(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session, hash_val="blake3:main")
|
||||
preview_asset = _make_asset(session, hash_val="blake3:preview")
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
preview_id = preview_asset.id
|
||||
session.commit()
|
||||
|
||||
set_asset_preview(
|
||||
reference_id=ref_id,
|
||||
preview_asset_id=preview_id,
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.preview_id == preview_id
|
||||
|
||||
def test_clears_preview(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
preview_asset = _make_asset(session, hash_val="blake3:preview")
|
||||
ref = _make_reference(session, asset)
|
||||
ref.preview_id = preview_asset.id
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
set_asset_preview(
|
||||
reference_id=ref_id,
|
||||
preview_asset_id=None,
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.preview_id is None
|
||||
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_asset_preview(reference_id="nonexistent")
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
set_asset_preview(
|
||||
reference_id=ref.id,
|
||||
preview_asset_id=None,
|
||||
owner_id="user2",
|
||||
)
|
||||
137
tests-unit/assets_test/services/test_bulk_ingest.py
Normal file
137
tests-unit/assets_test/services/test_bulk_ingest.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Tests for bulk ingest services."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
|
||||
|
||||
|
||||
class TestBatchInsertSeedAssets:
|
||||
def test_populates_mime_type_for_model_files(self, session: Session, temp_dir: Path):
|
||||
"""Verify mime_type is stored in the Asset table for model files."""
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"fake safetensors content")
|
||||
|
||||
specs: list[SeedAssetSpec] = [
|
||||
{
|
||||
"abs_path": str(file_path),
|
||||
"size_bytes": 24,
|
||||
"mtime_ns": 1234567890000000000,
|
||||
"info_name": "Test Model",
|
||||
"tags": ["models"],
|
||||
"fname": "model.safetensors",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": "application/safetensors",
|
||||
}
|
||||
]
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_refs == 1
|
||||
|
||||
# Verify Asset has mime_type populated
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
assert assets[0].mime_type == "application/safetensors"
|
||||
|
||||
def test_mime_type_none_when_not_provided(self, session: Session, temp_dir: Path):
|
||||
"""Verify mime_type is None when not provided in spec."""
|
||||
file_path = temp_dir / "unknown.bin"
|
||||
file_path.write_bytes(b"binary data")
|
||||
|
||||
specs: list[SeedAssetSpec] = [
|
||||
{
|
||||
"abs_path": str(file_path),
|
||||
"size_bytes": 11,
|
||||
"mtime_ns": 1234567890000000000,
|
||||
"info_name": "Unknown File",
|
||||
"tags": [],
|
||||
"fname": "unknown.bin",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": None,
|
||||
}
|
||||
]
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_refs == 1
|
||||
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
assert assets[0].mime_type is None
|
||||
|
||||
def test_various_model_mime_types(self, session: Session, temp_dir: Path):
|
||||
"""Verify various model file types get correct mime_type."""
|
||||
test_cases = [
|
||||
("model.safetensors", "application/safetensors"),
|
||||
("model.pt", "application/pytorch"),
|
||||
("model.ckpt", "application/pickle"),
|
||||
("model.gguf", "application/gguf"),
|
||||
]
|
||||
|
||||
specs: list[SeedAssetSpec] = []
|
||||
for filename, mime_type in test_cases:
|
||||
file_path = temp_dir / filename
|
||||
file_path.write_bytes(b"content")
|
||||
specs.append(
|
||||
{
|
||||
"abs_path": str(file_path),
|
||||
"size_bytes": 7,
|
||||
"mtime_ns": 1234567890000000000,
|
||||
"info_name": filename,
|
||||
"tags": [],
|
||||
"fname": filename,
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": mime_type,
|
||||
}
|
||||
)
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_refs == len(test_cases)
|
||||
|
||||
for filename, expected_mime in test_cases:
|
||||
ref = session.query(AssetReference).filter_by(name=filename).first()
|
||||
assert ref is not None
|
||||
asset = session.query(Asset).filter_by(id=ref.asset_id).first()
|
||||
assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"
|
||||
|
||||
|
||||
class TestMetadataExtraction:
|
||||
def test_extracts_mime_type_for_model_files(self, temp_dir: Path):
|
||||
"""Verify metadata extraction returns correct mime_type for model files."""
|
||||
from app.assets.services.metadata_extract import extract_file_metadata
|
||||
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"fake safetensors content")
|
||||
|
||||
meta = extract_file_metadata(str(file_path))
|
||||
|
||||
assert meta.content_type == "application/safetensors"
|
||||
|
||||
def test_mime_type_for_various_model_formats(self, temp_dir: Path):
|
||||
"""Verify various model file types get correct mime_type from metadata."""
|
||||
from app.assets.services.metadata_extract import extract_file_metadata
|
||||
|
||||
test_cases = [
|
||||
("model.safetensors", "application/safetensors"),
|
||||
("model.sft", "application/safetensors"),
|
||||
("model.pt", "application/pytorch"),
|
||||
("model.pth", "application/pytorch"),
|
||||
("model.ckpt", "application/pickle"),
|
||||
("model.pkl", "application/pickle"),
|
||||
("model.gguf", "application/gguf"),
|
||||
]
|
||||
|
||||
for filename, expected_mime in test_cases:
|
||||
file_path = temp_dir / filename
|
||||
file_path.write_bytes(b"content")
|
||||
|
||||
meta = extract_file_metadata(str(file_path))
|
||||
|
||||
assert meta.content_type == expected_mime, f"Expected {expected_mime} for {filename}, got {meta.content_type}"
|
||||
207
tests-unit/assets_test/services/test_enrich.py
Normal file
207
tests-unit/assets_test/services/test_enrich.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Tests for asset enrichment (mime_type and hash population)."""
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.scanner import (
|
||||
ENRICHMENT_HASHED,
|
||||
ENRICHMENT_METADATA,
|
||||
ENRICHMENT_STUB,
|
||||
enrich_asset,
|
||||
)
|
||||
|
||||
|
||||
def _create_stub_asset(
|
||||
session: Session,
|
||||
file_path: str,
|
||||
asset_id: str = "test-asset-id",
|
||||
reference_id: str = "test-ref-id",
|
||||
name: str | None = None,
|
||||
) -> tuple[Asset, AssetReference]:
|
||||
"""Create a stub asset with reference for testing enrichment."""
|
||||
asset = Asset(
|
||||
id=asset_id,
|
||||
hash=None,
|
||||
size_bytes=100,
|
||||
mime_type=None,
|
||||
)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
|
||||
ref = AssetReference(
|
||||
id=reference_id,
|
||||
asset_id=asset_id,
|
||||
name=name or f"test-asset-{asset_id}",
|
||||
owner_id="system",
|
||||
file_path=file_path,
|
||||
mtime_ns=1234567890000000000,
|
||||
enrichment_level=ENRICHMENT_STUB,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
|
||||
return asset, ref
|
||||
|
||||
|
||||
class TestEnrichAsset:
|
||||
def test_extracts_mime_type_and_updates_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify mime_type is written to the Asset table during enrichment."""
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"\x00" * 100)
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-1", "ref-1"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
new_level = enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=False,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_METADATA
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-1")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.mime_type == "application/safetensors"
|
||||
|
||||
def test_computes_hash_and_updates_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify hash is written to the Asset table during enrichment."""
|
||||
file_path = temp_dir / "data.bin"
|
||||
file_path.write_bytes(b"test content for hashing")
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-2", "ref-2"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
new_level = enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_HASHED
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-2")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.hash is not None
|
||||
assert updated_asset.hash.startswith("blake3:")
|
||||
|
||||
def test_enrichment_updates_both_mime_and_hash(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify both mime_type and hash are set when full enrichment runs."""
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"\x00" * 50)
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-3", "ref-3"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-3")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.mime_type == "application/safetensors"
|
||||
assert updated_asset.hash is not None
|
||||
assert updated_asset.hash.startswith("blake3:")
|
||||
|
||||
def test_missing_file_returns_stub_level(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify missing files don't cause errors and return STUB level."""
|
||||
file_path = temp_dir / "nonexistent.bin"
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-4", "ref-4"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
new_level = enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_STUB
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-4")
|
||||
assert updated_asset.mime_type is None
|
||||
assert updated_asset.hash is None
|
||||
|
||||
def test_duplicate_hash_merges_into_existing_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify duplicate files merge into existing asset instead of failing."""
|
||||
file_path_1 = temp_dir / "file1.bin"
|
||||
file_path_2 = temp_dir / "file2.bin"
|
||||
content = b"identical content"
|
||||
file_path_1.write_bytes(content)
|
||||
file_path_2.write_bytes(content)
|
||||
|
||||
asset1, ref1 = _create_stub_asset(
|
||||
session, str(file_path_1), "asset-dup-1", "ref-dup-1"
|
||||
)
|
||||
asset2, ref2 = _create_stub_asset(
|
||||
session, str(file_path_2), "asset-dup-2", "ref-dup-2"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path_1),
|
||||
reference_id=ref1.id,
|
||||
asset_id=asset1.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
enrich_asset(
|
||||
session,
|
||||
file_path=str(file_path_2),
|
||||
reference_id=ref2.id,
|
||||
asset_id=asset2.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
session.expire_all()
|
||||
|
||||
updated_asset1 = session.get(Asset, "asset-dup-1")
|
||||
assert updated_asset1 is not None
|
||||
assert updated_asset1.hash is not None
|
||||
|
||||
updated_asset2 = session.get(Asset, "asset-dup-2")
|
||||
assert updated_asset2 is None
|
||||
|
||||
updated_ref2 = session.get(AssetReference, "ref-dup-2")
|
||||
assert updated_ref2 is not None
|
||||
assert updated_ref2.asset_id == "asset-dup-1"
|
||||
229
tests-unit/assets_test/services/test_ingest.py
Normal file
229
tests-unit/assets_test/services/test_ingest.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""Tests for ingest services."""
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference, Tag
|
||||
from app.assets.database.queries import get_reference_tags
|
||||
from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
|
||||
|
||||
|
||||
class TestIngestFileFromPath:
|
||||
def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "test_file.bin"
|
||||
file_path.write_bytes(b"test content")
|
||||
|
||||
result = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:abc123",
|
||||
size_bytes=12,
|
||||
mtime_ns=1234567890000000000,
|
||||
mime_type="application/octet-stream",
|
||||
)
|
||||
|
||||
assert result.asset_created is True
|
||||
assert result.ref_created is True
|
||||
assert result.reference_id is not None
|
||||
|
||||
# Verify DB state
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
assert assets[0].hash == "blake3:abc123"
|
||||
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 1
|
||||
assert refs[0].file_path == str(file_path)
|
||||
|
||||
def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"model data")
|
||||
|
||||
result = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:def456",
|
||||
size_bytes=10,
|
||||
mtime_ns=1234567890000000000,
|
||||
mime_type="application/octet-stream",
|
||||
info_name="My Model",
|
||||
owner_id="user1",
|
||||
)
|
||||
|
||||
assert result.asset_created is True
|
||||
assert result.reference_id is not None
|
||||
|
||||
ref = session.query(AssetReference).first()
|
||||
assert ref is not None
|
||||
assert ref.name == "My Model"
|
||||
assert ref.owner_id == "user1"
|
||||
|
||||
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "tagged.bin"
|
||||
file_path.write_bytes(b"data")
|
||||
|
||||
result = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:ghi789",
|
||||
size_bytes=4,
|
||||
mtime_ns=1234567890000000000,
|
||||
info_name="Tagged Asset",
|
||||
tags=["models", "checkpoints"],
|
||||
)
|
||||
|
||||
assert result.reference_id is not None
|
||||
|
||||
# Verify tags were created and linked
|
||||
tags = session.query(Tag).all()
|
||||
tag_names = {t.name for t in tags}
|
||||
assert "models" in tag_names
|
||||
assert "checkpoints" in tag_names
|
||||
|
||||
ref_tags = get_reference_tags(session, reference_id=result.reference_id)
|
||||
assert set(ref_tags) == {"models", "checkpoints"}
|
||||
|
||||
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "dup.bin"
|
||||
file_path.write_bytes(b"content")
|
||||
|
||||
# First ingest
|
||||
r1 = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:repeat",
|
||||
size_bytes=7,
|
||||
mtime_ns=1234567890000000000,
|
||||
)
|
||||
assert r1.asset_created is True
|
||||
|
||||
# Second ingest with same hash - should update, not create
|
||||
r2 = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:repeat",
|
||||
size_bytes=7,
|
||||
mtime_ns=1234567890000000001, # different mtime
|
||||
)
|
||||
assert r2.asset_created is False
|
||||
assert r2.ref_created is False
|
||||
assert r2.ref_updated is True
|
||||
|
||||
# Still only one asset
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
|
||||
def test_validates_preview_id(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "with_preview.bin"
|
||||
file_path.write_bytes(b"data")
|
||||
|
||||
# Create a preview asset first
|
||||
preview_asset = Asset(hash="blake3:preview", size_bytes=100)
|
||||
session.add(preview_asset)
|
||||
session.commit()
|
||||
preview_id = preview_asset.id
|
||||
|
||||
result = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:main",
|
||||
size_bytes=4,
|
||||
mtime_ns=1234567890000000000,
|
||||
info_name="With Preview",
|
||||
preview_id=preview_id,
|
||||
)
|
||||
|
||||
assert result.reference_id is not None
|
||||
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
||||
assert ref.preview_id == preview_id
|
||||
|
||||
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "bad_preview.bin"
|
||||
file_path.write_bytes(b"data")
|
||||
|
||||
result = _ingest_file_from_path(
|
||||
abs_path=str(file_path),
|
||||
asset_hash="blake3:badpreview",
|
||||
size_bytes=4,
|
||||
mtime_ns=1234567890000000000,
|
||||
info_name="Bad Preview",
|
||||
preview_id="nonexistent-uuid",
|
||||
)
|
||||
|
||||
assert result.reference_id is not None
|
||||
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
||||
assert ref.preview_id is None
|
||||
|
||||
|
||||
class TestRegisterExistingAsset:
|
||||
def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
|
||||
# Create existing asset
|
||||
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
|
||||
session.add(asset)
|
||||
session.commit()
|
||||
|
||||
result = _register_existing_asset(
|
||||
asset_hash="blake3:existing",
|
||||
name="Registered Asset",
|
||||
user_metadata={"key": "value"},
|
||||
tags=["models"],
|
||||
)
|
||||
|
||||
assert result.created is True
|
||||
assert "models" in result.tags
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
|
||||
assert len(refs) == 1
|
||||
|
||||
def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
|
||||
# Create asset and reference
|
||||
asset = Asset(hash="blake3:withref", size_bytes=512)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
|
||||
from app.assets.helpers import get_utc_now
|
||||
ref = AssetReference(
|
||||
owner_id="",
|
||||
name="Existing Ref",
|
||||
asset_id=asset.id,
|
||||
created_at=get_utc_now(),
|
||||
updated_at=get_utc_now(),
|
||||
last_access_time=get_utc_now(),
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = _register_existing_asset(
|
||||
asset_hash="blake3:withref",
|
||||
name="Existing Ref",
|
||||
owner_id="",
|
||||
)
|
||||
|
||||
# Multiple files with same name are allowed
|
||||
assert result.created is True
|
||||
|
||||
# Verify two AssetReferences exist for this name
|
||||
session.expire_all()
|
||||
refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
|
||||
assert len(refs) == 2
|
||||
assert ref_id in [r.id for r in refs]
|
||||
|
||||
def test_raises_for_nonexistent_hash(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="No asset with hash"):
|
||||
_register_existing_asset(
|
||||
asset_hash="blake3:doesnotexist",
|
||||
name="Fail",
|
||||
)
|
||||
|
||||
def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
|
||||
asset = Asset(hash="blake3:tagged", size_bytes=256)
|
||||
session.add(asset)
|
||||
session.commit()
|
||||
|
||||
result = _register_existing_asset(
|
||||
asset_hash="blake3:tagged",
|
||||
name="Tagged Ref",
|
||||
tags=["alpha", "beta"],
|
||||
)
|
||||
|
||||
assert result.created is True
|
||||
assert set(result.tags) == {"alpha", "beta"}
|
||||
197
tests-unit/assets_test/services/test_tagging.py
Normal file
197
tests-unit/assets_test/services/test_tagging.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Tests for tagging services."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.services import apply_tags, remove_tags, list_tags
|
||||
|
||||
|
||||
def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
|
||||
asset = Asset(hash=hash_val, size_bytes=1024)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
return asset
|
||||
|
||||
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return ref
|
||||
|
||||
|
||||
class TestApplyTags:
|
||||
def test_adds_new_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = apply_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["alpha", "beta"],
|
||||
)
|
||||
|
||||
assert set(result.added) == {"alpha", "beta"}
|
||||
assert result.already_present == []
|
||||
assert set(result.total_tags) == {"alpha", "beta"}
|
||||
|
||||
def test_reports_already_present(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["existing"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["existing"])
|
||||
session.commit()
|
||||
|
||||
result = apply_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["existing", "new"],
|
||||
)
|
||||
|
||||
assert result.added == ["new"]
|
||||
assert result.already_present == ["existing"]
|
||||
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
apply_tags(reference_id="nonexistent", tags=["x"])
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
apply_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["new"],
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
|
||||
class TestRemoveTags:
|
||||
def test_removes_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["a", "b", "c"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
session.commit()
|
||||
|
||||
result = remove_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["a", "b"],
|
||||
)
|
||||
|
||||
assert set(result.removed) == {"a", "b"}
|
||||
assert result.not_present == []
|
||||
assert result.total_tags == ["c"]
|
||||
|
||||
def test_reports_not_present(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["present"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["present"])
|
||||
session.commit()
|
||||
|
||||
result = remove_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["present", "absent"],
|
||||
)
|
||||
|
||||
assert result.removed == ["present"]
|
||||
assert result.not_present == ["absent"]
|
||||
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
remove_tags(reference_id="nonexistent", tags=["x"])
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
remove_tags(
|
||||
reference_id=ref.id,
|
||||
tags=["x"],
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
|
||||
class TestListTags:
|
||||
def test_returns_tags_with_counts(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags()
|
||||
|
||||
tag_dict = {name: count for name, _, count in rows}
|
||||
assert tag_dict["used"] == 1
|
||||
assert tag_dict["unused"] == 0
|
||||
assert total == 2
|
||||
|
||||
def test_excludes_zero_counts(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
asset = _make_asset(session)
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags(include_zero=False)
|
||||
|
||||
tag_names = {name for name, _, _ in rows}
|
||||
assert "used" in tag_names
|
||||
assert "unused" not in tag_names
|
||||
|
||||
def test_prefix_filter(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["alpha", "beta", "alphabet"])
|
||||
session.commit()
|
||||
|
||||
rows, _ = list_tags(prefix="alph")
|
||||
|
||||
tag_names = {name for name, _, _ in rows}
|
||||
assert tag_names == {"alpha", "alphabet"}
|
||||
|
||||
def test_order_by_name(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["zebra", "alpha", "middle"])
|
||||
session.commit()
|
||||
|
||||
rows, _ = list_tags(order="name_asc")
|
||||
|
||||
names = [name for name, _, _ in rows]
|
||||
assert names == ["alpha", "middle", "zebra"]
|
||||
|
||||
def test_pagination(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["a", "b", "c", "d", "e"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags(limit=2, offset=1, order="name_asc")
|
||||
|
||||
assert total == 5
|
||||
assert len(rows) == 2
|
||||
names = [name for name, _, _ in rows]
|
||||
assert names == ["b", "c"]
|
||||
|
||||
def test_clamps_limit(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["a"])
|
||||
session.commit()
|
||||
|
||||
# Service should clamp limit to max 1000
|
||||
rows, _ = list_tags(limit=2000)
|
||||
assert len(rows) <= 1000
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from conftest import get_asset_filename, trigger_sync_seed_assets
|
||||
from helpers import get_asset_filename, trigger_sync_seed_assets
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from conftest import get_asset_filename, trigger_sync_seed_assets
|
||||
from helpers import get_asset_filename, trigger_sync_seed_assets
|
||||
|
||||
|
||||
def test_create_from_hash_success(
|
||||
@@ -24,11 +24,11 @@ def test_create_from_hash_success(
|
||||
assert b1["created_new"] is False
|
||||
aid = b1["id"]
|
||||
|
||||
# Calling again with the same name should return the same AssetInfo id
|
||||
# Calling again with the same name creates a new AssetReference (duplicates allowed)
|
||||
r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 201, b2
|
||||
assert b2["id"] == aid
|
||||
assert b2["id"] != aid # new reference, not the same one
|
||||
|
||||
|
||||
def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
@@ -42,8 +42,8 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
|
||||
assert "user_metadata" in detail
|
||||
assert "filename" in detail["user_metadata"]
|
||||
|
||||
# DELETE
|
||||
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
|
||||
# DELETE (hard delete to also remove underlying asset and file)
|
||||
rd = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
|
||||
assert rd.status_code == 204
|
||||
|
||||
# GET again -> 404
|
||||
@@ -53,6 +53,35 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
|
||||
assert body["error"]["code"] == "ASSET_NOT_FOUND"
|
||||
|
||||
|
||||
def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
aid = seeded_asset["id"]
|
||||
asset_hash = seeded_asset["asset_hash"]
|
||||
|
||||
# Soft-delete (default, no delete_content param)
|
||||
rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
|
||||
assert rd.status_code == 204
|
||||
|
||||
# GET by reference ID -> 404 (soft-deleted references are hidden)
|
||||
rg = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
|
||||
assert rg.status_code == 404
|
||||
|
||||
# Asset identity is preserved (underlying content still exists)
|
||||
rh = http.head(f"{api_base}/api/assets/hash/{asset_hash}", timeout=120)
|
||||
assert rh.status_code == 200
|
||||
|
||||
# Soft-deleted reference should not appear in listings
|
||||
rl = http.get(
|
||||
f"{api_base}/api/assets",
|
||||
params={"include_tags": "unit-tests", "limit": "500"},
|
||||
timeout=120,
|
||||
)
|
||||
ids = [a["id"] for a in rl.json().get("assets", [])]
|
||||
assert aid not in ids
|
||||
|
||||
# Clean up: hard-delete the soft-deleted reference and orphaned asset
|
||||
http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
|
||||
|
||||
|
||||
def test_delete_upon_reference_count(
|
||||
http: requests.Session, api_base: str, seeded_asset: dict
|
||||
):
|
||||
@@ -70,21 +99,32 @@ def test_delete_upon_reference_count(
|
||||
assert copy["asset_hash"] == src_hash
|
||||
assert copy["created_new"] is False
|
||||
|
||||
# Delete original reference -> asset identity must remain
|
||||
# Soft-delete original reference (default) -> asset identity must remain
|
||||
aid1 = seeded_asset["id"]
|
||||
rd1 = http.delete(f"{api_base}/api/assets/{aid1}", timeout=120)
|
||||
assert rd1.status_code == 204
|
||||
|
||||
rh1 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
|
||||
assert rh1.status_code == 200 # identity still present
|
||||
assert rh1.status_code == 200 # identity still present (second ref exists)
|
||||
|
||||
# Delete the last reference with default semantics -> identity and cached files removed
|
||||
# Soft-delete the last reference -> asset identity preserved (no hard delete)
|
||||
aid2 = copy["id"]
|
||||
rd2 = http.delete(f"{api_base}/api/assets/{aid2}", timeout=120)
|
||||
assert rd2.status_code == 204
|
||||
|
||||
rh2 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
|
||||
assert rh2.status_code == 404 # orphan content removed
|
||||
assert rh2.status_code == 200 # asset identity preserved (soft delete)
|
||||
|
||||
# Re-associate via from-hash, then hard-delete -> orphan content removed
|
||||
r3 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
|
||||
assert r3.status_code == 201, r3.json()
|
||||
aid3 = r3.json()["id"]
|
||||
|
||||
rd3 = http.delete(f"{api_base}/api/assets/{aid3}?delete_content=true", timeout=120)
|
||||
assert rd3.status_code == 204
|
||||
|
||||
rh3 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
|
||||
assert rh3.status_code == 404 # orphan content removed
|
||||
|
||||
|
||||
def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
@@ -126,42 +166,52 @@ def test_head_asset_bad_hash_returns_400_and_no_body(http: requests.Session, api
|
||||
assert body == b""
|
||||
|
||||
|
||||
def test_delete_nonexistent_returns_404(http: requests.Session, api_base: str):
|
||||
bogus = str(uuid.uuid4())
|
||||
r = http.delete(f"{api_base}/api/assets/{bogus}", timeout=120)
|
||||
@pytest.mark.parametrize(
|
||||
"method,endpoint_template,payload,expected_status,error_code",
|
||||
[
|
||||
# Delete nonexistent asset
|
||||
("delete", "/api/assets/{uuid}", None, 404, "ASSET_NOT_FOUND"),
|
||||
# Bad hash algorithm in from-hash
|
||||
(
|
||||
"post",
|
||||
"/api/assets/from-hash",
|
||||
{"hash": "sha256:" + "0" * 64, "name": "x.bin", "tags": ["models", "checkpoints", "unit-tests"]},
|
||||
400,
|
||||
"INVALID_BODY",
|
||||
),
|
||||
# Get with bad UUID format
|
||||
("get", "/api/assets/not-a-uuid", None, 404, None),
|
||||
# Get content with bad UUID format
|
||||
("get", "/api/assets/not-a-uuid/content", None, 404, None),
|
||||
],
|
||||
ids=["delete_nonexistent", "bad_hash_algorithm", "get_bad_uuid", "content_bad_uuid"],
|
||||
)
|
||||
def test_error_responses(
|
||||
http: requests.Session, api_base: str, method, endpoint_template, payload, expected_status, error_code
|
||||
):
|
||||
# Replace {uuid} placeholder with a random UUID for delete test
|
||||
endpoint = endpoint_template.replace("{uuid}", str(uuid.uuid4()))
|
||||
url = f"{api_base}{endpoint}"
|
||||
|
||||
if method == "get":
|
||||
r = http.get(url, timeout=120)
|
||||
elif method == "post":
|
||||
r = http.post(url, json=payload, timeout=120)
|
||||
elif method == "delete":
|
||||
r = http.delete(url, timeout=120)
|
||||
|
||||
assert r.status_code == expected_status
|
||||
if error_code:
|
||||
body = r.json()
|
||||
assert body["error"]["code"] == error_code
|
||||
|
||||
|
||||
def test_create_from_hash_invalid_json(http: requests.Session, api_base: str):
|
||||
"""Invalid JSON body requires special handling (data= instead of json=)."""
|
||||
r = http.post(f"{api_base}/api/assets/from-hash", data=b"{not json}", timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 404
|
||||
assert body["error"]["code"] == "ASSET_NOT_FOUND"
|
||||
|
||||
|
||||
def test_create_from_hash_invalids(http: requests.Session, api_base: str):
|
||||
# Bad hash algorithm
|
||||
bad = {
|
||||
"hash": "sha256:" + "0" * 64,
|
||||
"name": "x.bin",
|
||||
"tags": ["models", "checkpoints", "unit-tests"],
|
||||
}
|
||||
r1 = http.post(f"{api_base}/api/assets/from-hash", json=bad, timeout=120)
|
||||
b1 = r1.json()
|
||||
assert r1.status_code == 400
|
||||
assert b1["error"]["code"] == "INVALID_BODY"
|
||||
|
||||
# Invalid JSON body
|
||||
r2 = http.post(f"{api_base}/api/assets/from-hash", data=b"{not json}", timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 400
|
||||
assert b2["error"]["code"] == "INVALID_JSON"
|
||||
|
||||
|
||||
def test_get_update_download_bad_ids(http: requests.Session, api_base: str):
|
||||
# All endpoints should be not found, as we UUID regex directly in the route definition.
|
||||
bad_id = "not-a-uuid"
|
||||
|
||||
r1 = http.get(f"{api_base}/api/assets/{bad_id}", timeout=120)
|
||||
assert r1.status_code == 404
|
||||
|
||||
r3 = http.get(f"{api_base}/api/assets/{bad_id}/content", timeout=120)
|
||||
assert r3.status_code == 404
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "INVALID_JSON"
|
||||
|
||||
|
||||
def test_update_requires_at_least_one_field(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Optional
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from conftest import get_asset_filename, trigger_sync_seed_assets
|
||||
from helpers import get_asset_filename, trigger_sync_seed_assets
|
||||
|
||||
|
||||
def test_download_attachment_and_inline(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
@@ -117,7 +117,7 @@ def test_download_missing_file_returns_404(
|
||||
assert body["error"]["code"] == "FILE_NOT_FOUND"
|
||||
finally:
|
||||
# We created asset without the "unit-tests" tag(see `autoclean_unit_test_assets`), we need to clear it manually.
|
||||
dr = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
|
||||
dr = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
|
||||
dr.content
|
||||
|
||||
|
||||
|
||||
121
tests-unit/assets_test/test_file_utils.py
Normal file
121
tests-unit/assets_test/test_file_utils.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from app.assets.services.file_utils import is_visible, list_files_recursively
|
||||
|
||||
|
||||
class TestIsVisible:
|
||||
def test_visible_file(self):
|
||||
assert is_visible("file.txt") is True
|
||||
|
||||
def test_hidden_file(self):
|
||||
assert is_visible(".hidden") is False
|
||||
|
||||
def test_hidden_directory(self):
|
||||
assert is_visible(".git") is False
|
||||
|
||||
def test_visible_directory(self):
|
||||
assert is_visible("src") is True
|
||||
|
||||
def test_dotdot_is_hidden(self):
|
||||
assert is_visible("..") is False
|
||||
|
||||
def test_dot_is_hidden(self):
|
||||
assert is_visible(".") is False
|
||||
|
||||
|
||||
class TestListFilesRecursively:
|
||||
def test_skips_hidden_files(self, tmp_path):
|
||||
(tmp_path / "visible.txt").write_text("a")
|
||||
(tmp_path / ".hidden").write_text("b")
|
||||
|
||||
result = list_files_recursively(str(tmp_path))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].endswith("visible.txt")
|
||||
|
||||
def test_skips_hidden_directories(self, tmp_path):
|
||||
hidden_dir = tmp_path / ".hidden_dir"
|
||||
hidden_dir.mkdir()
|
||||
(hidden_dir / "file.txt").write_text("a")
|
||||
|
||||
visible_dir = tmp_path / "visible_dir"
|
||||
visible_dir.mkdir()
|
||||
(visible_dir / "file.txt").write_text("b")
|
||||
|
||||
result = list_files_recursively(str(tmp_path))
|
||||
|
||||
assert len(result) == 1
|
||||
assert "visible_dir" in result[0]
|
||||
assert ".hidden_dir" not in result[0]
|
||||
|
||||
def test_empty_directory(self, tmp_path):
|
||||
result = list_files_recursively(str(tmp_path))
|
||||
assert result == []
|
||||
|
||||
def test_nonexistent_directory(self, tmp_path):
|
||||
result = list_files_recursively(str(tmp_path / "nonexistent"))
|
||||
assert result == []
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
|
||||
def test_follows_symlinked_directories(self, tmp_path):
|
||||
target = tmp_path / "real_dir"
|
||||
target.mkdir()
|
||||
(target / "model.safetensors").write_text("data")
|
||||
|
||||
root = tmp_path / "root"
|
||||
root.mkdir()
|
||||
(root / "link").symlink_to(target)
|
||||
|
||||
result = list_files_recursively(str(root))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].endswith("model.safetensors")
|
||||
assert "link" in result[0]
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
|
||||
def test_follows_symlinked_files(self, tmp_path):
|
||||
real_file = tmp_path / "real.txt"
|
||||
real_file.write_text("content")
|
||||
|
||||
root = tmp_path / "root"
|
||||
root.mkdir()
|
||||
(root / "link.txt").symlink_to(real_file)
|
||||
|
||||
result = list_files_recursively(str(root))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].endswith("link.txt")
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
|
||||
def test_circular_symlinks_do_not_loop(self, tmp_path):
|
||||
dir_a = tmp_path / "a"
|
||||
dir_a.mkdir()
|
||||
(dir_a / "file.txt").write_text("a")
|
||||
# a/b -> a (circular)
|
||||
(dir_a / "b").symlink_to(dir_a)
|
||||
|
||||
result = list_files_recursively(str(dir_a))
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].endswith("file.txt")
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
|
||||
def test_mutual_circular_symlinks(self, tmp_path):
|
||||
dir_a = tmp_path / "a"
|
||||
dir_b = tmp_path / "b"
|
||||
dir_a.mkdir()
|
||||
dir_b.mkdir()
|
||||
(dir_a / "file_a.txt").write_text("a")
|
||||
(dir_b / "file_b.txt").write_text("b")
|
||||
# a/link_b -> b and b/link_a -> a
|
||||
(dir_a / "link_b").symlink_to(dir_b)
|
||||
(dir_b / "link_a").symlink_to(dir_a)
|
||||
|
||||
result = list_files_recursively(str(dir_a))
|
||||
basenames = sorted(os.path.basename(p) for p in result)
|
||||
|
||||
assert "file_a.txt" in basenames
|
||||
assert "file_b.txt" in basenames
|
||||
@@ -1,6 +1,7 @@
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
@@ -283,30 +284,21 @@ def test_list_assets_offset_beyond_total_and_limit_boundary(http, api_base, asse
|
||||
assert b2["has_more"] is False
|
||||
|
||||
|
||||
def test_list_assets_offset_negative_and_limit_nonint_rejected(http, api_base):
|
||||
r1 = http.get(api_base + "/api/assets", params={"offset": "-1"}, timeout=120)
|
||||
b1 = r1.json()
|
||||
assert r1.status_code == 400
|
||||
assert b1["error"]["code"] == "INVALID_QUERY"
|
||||
|
||||
r2 = http.get(api_base + "/api/assets", params={"limit": "abc"}, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 400
|
||||
assert b2["error"]["code"] == "INVALID_QUERY"
|
||||
|
||||
|
||||
def test_list_assets_invalid_query_rejected(http: requests.Session, api_base: str):
|
||||
# limit too small
|
||||
r1 = http.get(api_base + "/api/assets", params={"limit": "0"}, timeout=120)
|
||||
b1 = r1.json()
|
||||
assert r1.status_code == 400
|
||||
assert b1["error"]["code"] == "INVALID_QUERY"
|
||||
|
||||
# bad metadata JSON
|
||||
r2 = http.get(api_base + "/api/assets", params={"metadata_filter": "{not json"}, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 400
|
||||
assert b2["error"]["code"] == "INVALID_QUERY"
|
||||
@pytest.mark.parametrize(
|
||||
"params,error_code",
|
||||
[
|
||||
({"offset": "-1"}, "INVALID_QUERY"),
|
||||
({"limit": "abc"}, "INVALID_QUERY"),
|
||||
({"limit": "0"}, "INVALID_QUERY"),
|
||||
({"metadata_filter": "{not json"}, "INVALID_QUERY"),
|
||||
],
|
||||
ids=["negative_offset", "non_int_limit", "zero_limit", "invalid_metadata_json"],
|
||||
)
|
||||
def test_list_assets_invalid_query_rejected(http: requests.Session, api_base: str, params, error_code):
|
||||
r = http.get(api_base + "/api/assets", params=params, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == error_code
|
||||
|
||||
|
||||
def test_list_assets_name_contains_literal_underscore(
|
||||
|
||||
@@ -3,7 +3,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from conftest import get_asset_filename, trigger_sync_seed_assets
|
||||
from helpers import get_asset_filename, trigger_sync_seed_assets
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
482
tests-unit/assets_test/test_sync_references.py
Normal file
482
tests-unit/assets_test/test_sync_references.py
Normal file
@@ -0,0 +1,482 @@
|
||||
"""Tests for sync_references_with_filesystem in scanner.py."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import (
|
||||
Asset,
|
||||
AssetReference,
|
||||
AssetReferenceTag,
|
||||
Base,
|
||||
Tag,
|
||||
)
|
||||
from app.assets.database.queries.asset_reference import (
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
get_references_for_prefixes,
|
||||
get_unenriched_references,
|
||||
restore_references_by_paths,
|
||||
)
|
||||
from app.assets.scanner import sync_references_with_filesystem
|
||||
from app.assets.services.file_utils import get_mtime_ns
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_engine():
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
return engine
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(db_engine):
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir)
|
||||
|
||||
|
||||
def _create_file(temp_dir: Path, name: str, content: bytes = b"\x00" * 100) -> str:
|
||||
"""Create a file and return its absolute path (no symlink resolution)."""
|
||||
p = temp_dir / name
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_bytes(content)
|
||||
return os.path.abspath(str(p))
|
||||
|
||||
|
||||
def _stat_mtime_ns(path: str) -> int:
|
||||
return get_mtime_ns(os.stat(path, follow_symlinks=True))
|
||||
|
||||
|
||||
def _make_asset(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
file_path: str,
|
||||
ref_id: str,
|
||||
*,
|
||||
asset_hash: str | None = None,
|
||||
size_bytes: int = 100,
|
||||
mtime_ns: int | None = None,
|
||||
needs_verify: bool = False,
|
||||
is_missing: bool = False,
|
||||
) -> tuple[Asset, AssetReference]:
|
||||
"""Insert an Asset + AssetReference and flush."""
|
||||
asset = session.get(Asset, asset_id)
|
||||
if asset is None:
|
||||
asset = Asset(id=asset_id, hash=asset_hash, size_bytes=size_bytes)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
|
||||
ref = AssetReference(
|
||||
id=ref_id,
|
||||
asset_id=asset_id,
|
||||
name=f"test-{ref_id}",
|
||||
owner_id="system",
|
||||
file_path=file_path,
|
||||
mtime_ns=mtime_ns,
|
||||
needs_verify=needs_verify,
|
||||
is_missing=is_missing,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return asset, ref
|
||||
|
||||
|
||||
def _ensure_missing_tag(session: Session):
|
||||
"""Ensure the 'missing' tag exists."""
|
||||
if not session.get(Tag, "missing"):
|
||||
session.add(Tag(name="missing", tag_type="system"))
|
||||
session.flush()
|
||||
|
||||
|
||||
class _VerifyCase:
|
||||
def __init__(self, id, stat_unchanged, needs_verify_before, expect_needs_verify):
|
||||
self.id = id
|
||||
self.stat_unchanged = stat_unchanged
|
||||
self.needs_verify_before = needs_verify_before
|
||||
self.expect_needs_verify = expect_needs_verify
|
||||
|
||||
|
||||
VERIFY_CASES = [
|
||||
_VerifyCase(
|
||||
id="unchanged_clears_verify",
|
||||
stat_unchanged=True,
|
||||
needs_verify_before=True,
|
||||
expect_needs_verify=False,
|
||||
),
|
||||
_VerifyCase(
|
||||
id="unchanged_keeps_clear",
|
||||
stat_unchanged=True,
|
||||
needs_verify_before=False,
|
||||
expect_needs_verify=False,
|
||||
),
|
||||
_VerifyCase(
|
||||
id="changed_sets_verify",
|
||||
stat_unchanged=False,
|
||||
needs_verify_before=False,
|
||||
expect_needs_verify=True,
|
||||
),
|
||||
_VerifyCase(
|
||||
id="changed_keeps_verify",
|
||||
stat_unchanged=False,
|
||||
needs_verify_before=True,
|
||||
expect_needs_verify=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", VERIFY_CASES, ids=lambda c: c.id)
|
||||
def test_needs_verify_toggling(session, temp_dir, case):
|
||||
"""needs_verify is set/cleared based on mtime+size match."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
real_mtime = _stat_mtime_ns(fp)
|
||||
|
||||
mtime_for_db = real_mtime if case.stat_unchanged else real_mtime + 1
|
||||
_make_asset(
|
||||
session, "a1", fp, "r1",
|
||||
asset_hash="blake3:abc",
|
||||
mtime_ns=mtime_for_db,
|
||||
needs_verify=case.needs_verify_before,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref.needs_verify is case.expect_needs_verify
|
||||
|
||||
|
||||
class _MissingCase:
|
||||
def __init__(self, id, file_exists, expect_is_missing):
|
||||
self.id = id
|
||||
self.file_exists = file_exists
|
||||
self.expect_is_missing = expect_is_missing
|
||||
|
||||
|
||||
MISSING_CASES = [
|
||||
_MissingCase(id="existing_file_not_missing", file_exists=True, expect_is_missing=False),
|
||||
_MissingCase(id="missing_file_marked_missing", file_exists=False, expect_is_missing=True),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", MISSING_CASES, ids=lambda c: c.id)
|
||||
def test_is_missing_flag(session, temp_dir, case):
|
||||
"""is_missing reflects whether the file exists on disk."""
|
||||
if case.file_exists:
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
else:
|
||||
fp = str(temp_dir / "gone.bin")
|
||||
mtime = 999
|
||||
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref.is_missing is case.expect_is_missing
|
||||
|
||||
|
||||
def test_seed_asset_all_missing_deletes_asset(session, temp_dir):
|
||||
"""Seed asset with all refs missing gets deleted entirely."""
|
||||
fp = str(temp_dir / "gone.bin")
|
||||
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
assert session.get(Asset, "seed1") is None
|
||||
assert session.get(AssetReference, "r1") is None
|
||||
|
||||
|
||||
def test_seed_asset_some_exist_returns_survivors(session, temp_dir):
|
||||
"""Seed asset with at least one existing ref survives and is returned."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=mtime)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
survivors = sync_references_with_filesystem(
|
||||
session, "models", collect_existing_paths=True,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert session.get(Asset, "seed1") is not None
|
||||
assert os.path.abspath(fp) in survivors
|
||||
|
||||
|
||||
def test_hashed_asset_prunes_missing_refs_when_one_is_ok(session, temp_dir):
|
||||
"""Hashed asset with one stat-unchanged ref deletes missing refs."""
|
||||
fp_ok = _create_file(temp_dir, "good.bin")
|
||||
fp_gone = str(temp_dir / "gone.bin")
|
||||
mtime = _stat_mtime_ns(fp_ok)
|
||||
|
||||
_make_asset(session, "h1", fp_ok, "r_ok", asset_hash="blake3:aaa", mtime_ns=mtime)
|
||||
# Second ref on same asset, file missing
|
||||
ref_gone = AssetReference(
|
||||
id="r_gone", asset_id="h1", name="gone",
|
||||
owner_id="system", file_path=fp_gone, mtime_ns=999,
|
||||
)
|
||||
session.add(ref_gone)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
assert session.get(AssetReference, "r_ok") is not None
|
||||
assert session.get(AssetReference, "r_gone") is None
|
||||
|
||||
|
||||
def test_hashed_asset_all_missing_keeps_refs(session, temp_dir):
|
||||
"""Hashed asset with all refs missing keeps refs (no pruning)."""
|
||||
fp = str(temp_dir / "gone.bin")
|
||||
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
assert session.get(AssetReference, "r1") is not None
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref.is_missing is True
|
||||
|
||||
|
||||
def test_missing_tag_added_when_all_refs_gone(session, temp_dir):
|
||||
"""Missing tag is added to hashed asset when all refs are missing."""
|
||||
_ensure_missing_tag(session)
|
||||
fp = str(temp_dir / "gone.bin")
|
||||
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(
|
||||
session, "models", update_missing_tags=True,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
||||
assert tag_link is not None
|
||||
|
||||
|
||||
def test_missing_tag_removed_when_ref_ok(session, temp_dir):
|
||||
"""Missing tag is removed from hashed asset when a ref is stat-unchanged."""
|
||||
_ensure_missing_tag(session)
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=mtime)
|
||||
# Pre-add a stale missing tag
|
||||
session.add(AssetReferenceTag(
|
||||
asset_reference_id="r1", tag_name="missing", origin="automatic",
|
||||
))
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(
|
||||
session, "models", update_missing_tags=True,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
||||
assert tag_link is None
|
||||
|
||||
|
||||
def test_missing_tags_not_touched_when_flag_false(session, temp_dir):
|
||||
"""Missing tags are not modified when update_missing_tags=False."""
|
||||
_ensure_missing_tag(session)
|
||||
fp = str(temp_dir / "gone.bin")
|
||||
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(
|
||||
session, "models", update_missing_tags=False,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
||||
assert tag_link is None # tag was never added
|
||||
|
||||
|
||||
def test_returns_none_when_collect_false(session, temp_dir):
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
result = sync_references_with_filesystem(
|
||||
session, "models", collect_existing_paths=False,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_returns_empty_set_for_no_prefixes(session):
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[]):
|
||||
result = sync_references_with_filesystem(
|
||||
session, "models", collect_existing_paths=True,
|
||||
)
|
||||
|
||||
assert result == set()
|
||||
|
||||
|
||||
def test_no_references_is_noop(session, temp_dir):
|
||||
"""No crash and no side effects when there are no references."""
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
survivors = sync_references_with_filesystem(
|
||||
session, "models", collect_existing_paths=True,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert survivors == set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Soft-delete persistence across scanner operations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _soft_delete_ref(session: Session, ref_id: str) -> None:
|
||||
"""Mark a reference as soft-deleted (mimics the API DELETE behaviour)."""
|
||||
ref = session.get(AssetReference, ref_id)
|
||||
ref.deleted_at = datetime(2025, 1, 1)
|
||||
session.flush()
|
||||
|
||||
|
||||
def test_soft_deleted_ref_excluded_from_get_references_for_prefixes(session, temp_dir):
|
||||
"""get_references_for_prefixes skips soft-deleted references."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
rows = get_references_for_prefixes(session, [str(temp_dir)], include_missing=True)
|
||||
assert len(rows) == 0
|
||||
|
||||
|
||||
def test_sync_does_not_resurrect_soft_deleted_ref(session, temp_dir):
|
||||
"""Scanner sync leaves soft-deleted refs untouched even when file exists on disk."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref.deleted_at is not None, "soft-deleted ref must stay deleted after sync"
|
||||
|
||||
|
||||
def test_bulk_insert_does_not_overwrite_soft_deleted_ref(session, temp_dir):
|
||||
"""bulk_insert_references_ignore_conflicts cannot replace a soft-deleted row."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
now = datetime.now(tz=None)
|
||||
bulk_insert_references_ignore_conflicts(session, [
|
||||
{
|
||||
"id": "r_new",
|
||||
"asset_id": "a1",
|
||||
"file_path": fp,
|
||||
"name": "model.bin",
|
||||
"owner_id": "",
|
||||
"mtime_ns": mtime,
|
||||
"preview_id": None,
|
||||
"user_metadata": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
}
|
||||
])
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
# Original row is still the soft-deleted one
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref is not None
|
||||
assert ref.deleted_at is not None
|
||||
# The new row was not inserted (conflict on file_path)
|
||||
assert session.get(AssetReference, "r_new") is None
|
||||
|
||||
|
||||
def test_restore_references_by_paths_skips_soft_deleted(session, temp_dir):
|
||||
"""restore_references_by_paths does not clear is_missing on soft-deleted refs."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(
|
||||
session, "a1", fp, "r1",
|
||||
asset_hash="blake3:abc", mtime_ns=mtime, is_missing=True,
|
||||
)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
restored = restore_references_by_paths(session, [fp])
|
||||
session.commit()
|
||||
|
||||
assert restored == 0
|
||||
session.expire_all()
|
||||
ref = session.get(AssetReference, "r1")
|
||||
assert ref.is_missing is True, "is_missing must not be cleared on soft-deleted ref"
|
||||
assert ref.deleted_at is not None
|
||||
|
||||
|
||||
def test_get_unenriched_references_excludes_soft_deleted(session, temp_dir):
|
||||
"""Enrichment queries do not pick up soft-deleted references."""
|
||||
fp = _create_file(temp_dir, "model.bin")
|
||||
mtime = _stat_mtime_ns(fp)
|
||||
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
rows = get_unenriched_references(session, [str(temp_dir)], max_level=2)
|
||||
assert len(rows) == 0
|
||||
|
||||
|
||||
def test_sync_ignores_soft_deleted_seed_asset(session, temp_dir):
|
||||
"""Soft-deleted seed ref is not garbage-collected even when file is missing."""
|
||||
fp = str(temp_dir / "gone.bin") # file does not exist
|
||||
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
|
||||
_soft_delete_ref(session, "r1")
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
||||
sync_references_with_filesystem(session, "models")
|
||||
session.commit()
|
||||
|
||||
session.expire_all()
|
||||
# Asset and ref must still exist — scanner did not see the soft-deleted row
|
||||
assert session.get(Asset, "seed1") is not None
|
||||
assert session.get(AssetReference, "r1") is not None
|
||||
@@ -69,8 +69,8 @@ def test_tags_empty_usage(http: requests.Session, api_base: str, asset_factory,
|
||||
used_names = [t["name"] for t in body2["tags"]]
|
||||
assert custom_tag in used_names
|
||||
|
||||
# Delete the asset so the tag usage drops to zero
|
||||
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}", timeout=120)
|
||||
# Hard-delete the asset so the tag usage drops to zero
|
||||
rd = http.delete(f"{api_base}/api/assets/{_asset['id']}?delete_content=true", timeout=120)
|
||||
assert rd.status_code == 204
|
||||
|
||||
# Now the custom tag must not be returned when include_zero=false
|
||||
@@ -18,25 +18,25 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma
|
||||
assert r1.status_code == 201, a1
|
||||
assert a1["created_new"] is True
|
||||
|
||||
# Second upload with the same data and name should return created_new == False and the same asset
|
||||
# Second upload with the same data and name creates a new AssetReference (duplicates allowed)
|
||||
# Returns 200 because Asset already exists, but a new AssetReference is created
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a2 = r2.json()
|
||||
assert r2.status_code == 200, a2
|
||||
assert a2["created_new"] is False
|
||||
assert r2.status_code in (200, 201), a2
|
||||
assert a2["asset_hash"] == a1["asset_hash"]
|
||||
assert a2["id"] == a1["id"] # old reference
|
||||
assert a2["id"] != a1["id"] # new reference with same content
|
||||
|
||||
# Third upload with the same data but new name should return created_new == False and the new AssetReference
|
||||
# Third upload with the same data but different name also creates new AssetReference
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a3 = r2.json()
|
||||
assert r2.status_code == 200, a3
|
||||
assert a3["created_new"] is False
|
||||
r3 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a3 = r3.json()
|
||||
assert r3.status_code in (200, 201), a3
|
||||
assert a3["asset_hash"] == a1["asset_hash"]
|
||||
assert a3["id"] != a1["id"] # old reference
|
||||
assert a3["id"] != a1["id"]
|
||||
assert a3["id"] != a2["id"]
|
||||
|
||||
|
||||
def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):
|
||||
@@ -116,7 +116,7 @@ def test_concurrent_upload_identical_bytes_different_names(
|
||||
):
|
||||
"""
|
||||
Two concurrent uploads of identical bytes but different names.
|
||||
Expect a single Asset (same hash), two AssetInfo rows, and exactly one created_new=True.
|
||||
Expect a single Asset (same hash), two AssetReference rows, and exactly one created_new=True.
|
||||
"""
|
||||
scope = f"concupload-{uuid.uuid4().hex[:6]}"
|
||||
name1, name2 = "cu_a.bin", "cu_b.bin"
|
||||
|
||||
@@ -2,4 +2,3 @@ pytest>=7.8.0
|
||||
pytest-aiohttp
|
||||
pytest-asyncio
|
||||
websocket-client
|
||||
blake3
|
||||
|
||||
900
tests-unit/seeder_test/test_seeder.py
Normal file
900
tests-unit/seeder_test/test_seeder.py
Normal file
@@ -0,0 +1,900 @@
|
||||
"""Unit tests for the _AssetSeeder background scanning class."""
|
||||
|
||||
import threading
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.assets.database.queries.asset_reference import UnenrichedReferenceRow
|
||||
from app.assets.seeder import _AssetSeeder, Progress, ScanInProgressError, ScanPhase, State
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_seeder():
|
||||
"""Create a fresh _AssetSeeder instance for testing."""
|
||||
seeder = _AssetSeeder()
|
||||
yield seeder
|
||||
seeder.shutdown(timeout=1.0)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_dependencies():
|
||||
"""Mock all external dependencies for isolated testing."""
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
class TestSeederStateTransitions:
|
||||
"""Test state machine transitions."""
|
||||
|
||||
def test_initial_state_is_idle(self, fresh_seeder: _AssetSeeder):
|
||||
assert fresh_seeder.get_status().state == State.IDLE
|
||||
|
||||
def test_start_transitions_to_running(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
started = fresh_seeder.start(roots=("models",))
|
||||
assert started is True
|
||||
assert reached.wait(timeout=2.0)
|
||||
assert fresh_seeder.get_status().state == State.RUNNING
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_start_while_running_returns_false(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
second_start = fresh_seeder.start(roots=("models",))
|
||||
assert second_start is False
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_cancel_transitions_to_cancelling(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
cancelled = fresh_seeder.cancel()
|
||||
assert cancelled is True
|
||||
assert fresh_seeder.get_status().state == State.CANCELLING
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_cancel_when_idle_returns_false(self, fresh_seeder: _AssetSeeder):
|
||||
cancelled = fresh_seeder.cancel()
|
||||
assert cancelled is False
|
||||
|
||||
def test_state_returns_to_idle_after_completion(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
completed = fresh_seeder.wait(timeout=5.0)
|
||||
assert completed is True
|
||||
assert fresh_seeder.get_status().state == State.IDLE
|
||||
|
||||
|
||||
class TestSeederWait:
|
||||
"""Test wait() behavior."""
|
||||
|
||||
def test_wait_blocks_until_complete(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
completed = fresh_seeder.wait(timeout=5.0)
|
||||
assert completed is True
|
||||
assert fresh_seeder.get_status().state == State.IDLE
|
||||
|
||||
def test_wait_returns_false_on_timeout(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
barrier.wait(timeout=10.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
completed = fresh_seeder.wait(timeout=0.1)
|
||||
assert completed is False
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_wait_when_idle_returns_true(self, fresh_seeder: _AssetSeeder):
|
||||
completed = fresh_seeder.wait(timeout=1.0)
|
||||
assert completed is True
|
||||
|
||||
|
||||
class TestSeederProgress:
|
||||
"""Test progress tracking."""
|
||||
|
||||
def test_get_status_returns_progress_during_scan(
|
||||
self, fresh_seeder: _AssetSeeder
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_build(*args, **kwargs):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return ([], set(), 0)
|
||||
|
||||
paths = ["/path/file1.safetensors", "/path/file2.safetensors"]
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=paths),
|
||||
patch("app.assets.seeder.build_asset_specs", side_effect=slow_build),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
status = fresh_seeder.get_status()
|
||||
assert status.state == State.RUNNING
|
||||
assert status.progress is not None
|
||||
assert status.progress.total == 2
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_progress_callback_is_invoked(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
progress_updates: list[Progress] = []
|
||||
|
||||
def callback(p: Progress):
|
||||
progress_updates.append(p)
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots",
|
||||
return_value=[f"/path/file{i}.safetensors" for i in range(10)],
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), progress_callback=callback)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(progress_updates) > 0
|
||||
|
||||
|
||||
class TestSeederCancellation:
|
||||
"""Test cancellation behavior."""
|
||||
|
||||
def test_scan_commits_partial_progress_on_cancellation(
|
||||
self, fresh_seeder: _AssetSeeder
|
||||
):
|
||||
insert_count = 0
|
||||
barrier = threading.Event()
|
||||
first_insert_done = threading.Event()
|
||||
|
||||
def slow_insert(specs, tags):
|
||||
nonlocal insert_count
|
||||
insert_count += 1
|
||||
if insert_count == 1:
|
||||
first_insert_done.set()
|
||||
if insert_count >= 2:
|
||||
barrier.wait(timeout=5.0)
|
||||
return len(specs)
|
||||
|
||||
paths = [f"/path/file{i}.safetensors" for i in range(1500)]
|
||||
specs = [
|
||||
{
|
||||
"abs_path": p,
|
||||
"size_bytes": 100,
|
||||
"mtime_ns": 0,
|
||||
"info_name": f"file{i}",
|
||||
"tags": [],
|
||||
"fname": f"file{i}",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": None,
|
||||
}
|
||||
for i, p in enumerate(paths)
|
||||
]
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=paths),
|
||||
patch(
|
||||
"app.assets.seeder.build_asset_specs", return_value=(specs, set(), 0)
|
||||
),
|
||||
patch("app.assets.seeder.insert_asset_specs", side_effect=slow_insert),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert first_insert_done.wait(timeout=2.0)
|
||||
|
||||
fresh_seeder.cancel()
|
||||
barrier.set()
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert 1 <= insert_count < 3 # 1500 paths / 500 batch = 3; cancel stopped early
|
||||
|
||||
|
||||
class TestSeederErrorHandling:
|
||||
"""Test error handling behavior."""
|
||||
|
||||
def test_database_errors_captured_in_status(self, fresh_seeder: _AssetSeeder):
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch(
|
||||
"app.assets.seeder.collect_paths_for_roots",
|
||||
return_value=["/path/file.safetensors"],
|
||||
),
|
||||
patch(
|
||||
"app.assets.seeder.build_asset_specs",
|
||||
return_value=(
|
||||
[
|
||||
{
|
||||
"abs_path": "/path/file.safetensors",
|
||||
"size_bytes": 100,
|
||||
"mtime_ns": 0,
|
||||
"info_name": "file",
|
||||
"tags": [],
|
||||
"fname": "file",
|
||||
"metadata": None,
|
||||
"hash": None,
|
||||
"mime_type": None,
|
||||
}
|
||||
],
|
||||
set(),
|
||||
0,
|
||||
),
|
||||
),
|
||||
patch(
|
||||
"app.assets.seeder.insert_asset_specs",
|
||||
side_effect=Exception("DB connection failed"),
|
||||
),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
status = fresh_seeder.get_status()
|
||||
assert len(status.errors) > 0
|
||||
assert "DB connection failed" in status.errors[0]
|
||||
|
||||
def test_dependencies_unavailable_captured_in_errors(
|
||||
self, fresh_seeder: _AssetSeeder
|
||||
):
|
||||
with patch("app.assets.seeder.dependencies_available", return_value=False):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
status = fresh_seeder.get_status()
|
||||
assert len(status.errors) > 0
|
||||
assert "dependencies" in status.errors[0].lower()
|
||||
|
||||
def test_thread_crash_resets_state_to_idle(self, fresh_seeder: _AssetSeeder):
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch(
|
||||
"app.assets.seeder.sync_root_safely",
|
||||
side_effect=RuntimeError("Unexpected crash"),
|
||||
),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
status = fresh_seeder.get_status()
|
||||
assert status.state == State.IDLE
|
||||
assert len(status.errors) > 0
|
||||
|
||||
|
||||
class TestSeederThreadSafety:
|
||||
"""Test thread safety of concurrent operations."""
|
||||
|
||||
def test_concurrent_start_calls_spawn_only_one_thread(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
results = []
|
||||
|
||||
def try_start():
|
||||
results.append(fresh_seeder.start(roots=("models",)))
|
||||
|
||||
threads = [threading.Thread(target=try_start) for _ in range(10)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
barrier.set()
|
||||
|
||||
assert sum(results) == 1
|
||||
|
||||
def test_get_status_safe_during_scan(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
statuses = []
|
||||
for _ in range(100):
|
||||
statuses.append(fresh_seeder.get_status())
|
||||
|
||||
barrier.set()
|
||||
|
||||
assert all(
|
||||
s.state in (State.RUNNING, State.IDLE, State.CANCELLING)
|
||||
for s in statuses
|
||||
)
|
||||
|
||||
|
||||
class TestSeederMarkMissing:
|
||||
"""Test mark_missing_outside_prefixes behavior."""
|
||||
|
||||
def test_mark_missing_when_idle(self, fresh_seeder: _AssetSeeder):
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch(
|
||||
"app.assets.seeder.get_all_known_prefixes",
|
||||
return_value=["/models", "/input", "/output"],
|
||||
),
|
||||
patch(
|
||||
"app.assets.seeder.mark_missing_outside_prefixes_safely", return_value=5
|
||||
) as mock_mark,
|
||||
):
|
||||
result = fresh_seeder.mark_missing_outside_prefixes()
|
||||
assert result == 5
|
||||
mock_mark.assert_called_once_with(["/models", "/input", "/output"])
|
||||
|
||||
def test_mark_missing_raises_when_running(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
with pytest.raises(ScanInProgressError):
|
||||
fresh_seeder.mark_missing_outside_prefixes()
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_mark_missing_returns_zero_when_dependencies_unavailable(
|
||||
self, fresh_seeder: _AssetSeeder
|
||||
):
|
||||
with patch("app.assets.seeder.dependencies_available", return_value=False):
|
||||
result = fresh_seeder.mark_missing_outside_prefixes()
|
||||
assert result == 0
|
||||
|
||||
def test_prune_first_flag_triggers_mark_missing_before_scan(
|
||||
self, fresh_seeder: _AssetSeeder
|
||||
):
|
||||
call_order = []
|
||||
|
||||
def track_mark(prefixes):
|
||||
call_order.append("mark_missing")
|
||||
return 3
|
||||
|
||||
def track_sync(root):
|
||||
call_order.append(f"sync_{root}")
|
||||
return set()
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models"]),
|
||||
patch("app.assets.seeder.mark_missing_outside_prefixes_safely", side_effect=track_mark),
|
||||
patch("app.assets.seeder.sync_root_safely", side_effect=track_sync),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), prune_first=True)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert call_order[0] == "mark_missing"
|
||||
assert "sync_models" in call_order
|
||||
|
||||
|
||||
class TestSeederPhases:
|
||||
"""Test phased scanning behavior."""
|
||||
|
||||
def test_start_fast_only_runs_fast_phase(self, fresh_seeder: _AssetSeeder):
|
||||
"""Verify start_fast only runs the fast phase."""
|
||||
fast_called = []
|
||||
enrich_called = []
|
||||
|
||||
def track_fast(*args, **kwargs):
|
||||
fast_called.append(True)
|
||||
return ([], set(), 0)
|
||||
|
||||
def track_enrich(*args, **kwargs):
|
||||
enrich_called.append(True)
|
||||
return []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start_fast(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(fast_called) == 1
|
||||
assert len(enrich_called) == 0
|
||||
|
||||
def test_start_enrich_only_runs_enrich_phase(self, fresh_seeder: _AssetSeeder):
|
||||
"""Verify start_enrich only runs the enrich phase."""
|
||||
fast_called = []
|
||||
enrich_called = []
|
||||
|
||||
def track_fast(*args, **kwargs):
|
||||
fast_called.append(True)
|
||||
return ([], set(), 0)
|
||||
|
||||
def track_enrich(*args, **kwargs):
|
||||
enrich_called.append(True)
|
||||
return []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start_enrich(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(fast_called) == 0
|
||||
assert len(enrich_called) == 1
|
||||
|
||||
def test_full_scan_runs_both_phases(self, fresh_seeder: _AssetSeeder):
|
||||
"""Verify full scan runs both fast and enrich phases."""
|
||||
fast_called = []
|
||||
enrich_called = []
|
||||
|
||||
def track_fast(*args, **kwargs):
|
||||
fast_called.append(True)
|
||||
return ([], set(), 0)
|
||||
|
||||
def track_enrich(*args, **kwargs):
|
||||
enrich_called.append(True)
|
||||
return []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), phase=ScanPhase.FULL)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(fast_called) == 1
|
||||
assert len(enrich_called) == 1
|
||||
|
||||
|
||||
class TestSeederPauseResume:
|
||||
"""Test pause/resume behavior."""
|
||||
|
||||
def test_pause_transitions_to_paused(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
paused = fresh_seeder.pause()
|
||||
assert paused is True
|
||||
assert fresh_seeder.get_status().state == State.PAUSED
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_pause_when_idle_returns_false(self, fresh_seeder: _AssetSeeder):
|
||||
paused = fresh_seeder.pause()
|
||||
assert paused is False
|
||||
|
||||
def test_resume_returns_to_running(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
fresh_seeder.pause()
|
||||
assert fresh_seeder.get_status().state == State.PAUSED
|
||||
|
||||
resumed = fresh_seeder.resume()
|
||||
assert resumed is True
|
||||
assert fresh_seeder.get_status().state == State.RUNNING
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_resume_when_not_paused_returns_false(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
resumed = fresh_seeder.resume()
|
||||
assert resumed is False
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_cancel_while_paused_works(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached_checkpoint = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached_checkpoint.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached_checkpoint.wait(timeout=2.0)
|
||||
|
||||
fresh_seeder.pause()
|
||||
assert fresh_seeder.get_status().state == State.PAUSED
|
||||
|
||||
cancelled = fresh_seeder.cancel()
|
||||
assert cancelled is True
|
||||
|
||||
barrier.set()
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
assert fresh_seeder.get_status().state == State.IDLE
|
||||
|
||||
class TestSeederStopRestart:
|
||||
"""Test stop and restart behavior."""
|
||||
|
||||
def test_stop_is_alias_for_cancel(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
|
||||
def slow_collect(*args):
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
stopped = fresh_seeder.stop()
|
||||
assert stopped is True
|
||||
assert fresh_seeder.get_status().state == State.CANCELLING
|
||||
|
||||
barrier.set()
|
||||
|
||||
def test_restart_cancels_and_starts_new_scan(
|
||||
self, fresh_seeder: _AssetSeeder, mock_dependencies
|
||||
):
|
||||
barrier = threading.Event()
|
||||
reached = threading.Event()
|
||||
start_count = 0
|
||||
|
||||
def slow_collect(*args):
|
||||
nonlocal start_count
|
||||
start_count += 1
|
||||
if start_count == 1:
|
||||
reached.set()
|
||||
barrier.wait(timeout=5.0)
|
||||
return []
|
||||
|
||||
with patch(
|
||||
"app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
assert reached.wait(timeout=2.0)
|
||||
|
||||
barrier.set()
|
||||
restarted = fresh_seeder.restart()
|
||||
assert restarted is True
|
||||
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
assert start_count == 2
|
||||
|
||||
def test_restart_preserves_previous_params(self, fresh_seeder: _AssetSeeder):
|
||||
"""Verify restart uses previous params when not overridden."""
|
||||
collected_roots = []
|
||||
|
||||
def track_collect(roots):
|
||||
collected_roots.append(roots)
|
||||
return []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", side_effect=track_collect),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("input", "output"))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
fresh_seeder.restart()
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(collected_roots) == 2
|
||||
assert collected_roots[0] == ("input", "output")
|
||||
assert collected_roots[1] == ("input", "output")
|
||||
|
||||
def test_restart_can_override_params(self, fresh_seeder: _AssetSeeder):
|
||||
"""Verify restart can override previous params."""
|
||||
collected_roots = []
|
||||
|
||||
def track_collect(roots):
|
||||
collected_roots.append(roots)
|
||||
return []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", side_effect=track_collect),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
fresh_seeder.restart(roots=("input",))
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
assert len(collected_roots) == 2
|
||||
assert collected_roots[0] == ("models",)
|
||||
assert collected_roots[1] == ("input",)
|
||||
|
||||
|
||||
def _make_row(ref_id: str, asset_id: str = "a1") -> UnenrichedReferenceRow:
|
||||
return UnenrichedReferenceRow(
|
||||
reference_id=ref_id, asset_id=asset_id,
|
||||
file_path=f"/fake/{ref_id}.bin", enrichment_level=0,
|
||||
)
|
||||
|
||||
|
||||
class TestEnrichPhaseDefensiveLogic:
|
||||
"""Test skip_ids filtering and consecutive_empty termination."""
|
||||
|
||||
def test_failed_refs_are_skipped_on_subsequent_batches(
|
||||
self, fresh_seeder: _AssetSeeder,
|
||||
):
|
||||
"""References that fail enrichment are filtered out of future batches."""
|
||||
row_a = _make_row("r1")
|
||||
row_b = _make_row("r2")
|
||||
call_count = 0
|
||||
|
||||
def fake_get_unenriched(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count <= 2:
|
||||
return [row_a, row_b]
|
||||
return []
|
||||
|
||||
enriched_refs: list[list[str]] = []
|
||||
|
||||
def fake_enrich(rows, **kwargs):
|
||||
ref_ids = [r.reference_id for r in rows]
|
||||
enriched_refs.append(ref_ids)
|
||||
# r1 always fails, r2 succeeds
|
||||
failed = [r.reference_id for r in rows if r.reference_id == "r1"]
|
||||
enriched = len(rows) - len(failed)
|
||||
return enriched, failed
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
|
||||
patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
# First batch: both refs attempted
|
||||
assert "r1" in enriched_refs[0]
|
||||
assert "r2" in enriched_refs[0]
|
||||
# Second batch: r1 filtered out
|
||||
assert "r1" not in enriched_refs[1]
|
||||
assert "r2" in enriched_refs[1]
|
||||
|
||||
def test_stops_after_consecutive_empty_batches(
|
||||
self, fresh_seeder: _AssetSeeder,
|
||||
):
|
||||
"""Enrich phase terminates after 3 consecutive batches with zero progress."""
|
||||
row = _make_row("r1")
|
||||
batch_count = 0
|
||||
|
||||
def fake_get_unenriched(*args, **kwargs):
|
||||
nonlocal batch_count
|
||||
batch_count += 1
|
||||
# Always return the same row (simulating a permanently failing ref)
|
||||
return [row]
|
||||
|
||||
def fake_enrich(rows, **kwargs):
|
||||
# Always fail — zero enriched, all failed
|
||||
return 0, [r.reference_id for r in rows]
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
|
||||
patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
# Should stop after exactly 3 consecutive empty batches
|
||||
# Batch 1: returns row, enrich fails → filtered out in batch 2+
|
||||
# But get_unenriched keeps returning it, filter removes it → empty → break
|
||||
# Actually: batch 1 has row, fails. Batch 2 get_unenriched returns [row],
|
||||
# skip_ids filters it → empty list → breaks via `if not unenriched: break`
|
||||
# So it terminates in 2 calls to get_unenriched.
|
||||
assert batch_count == 2
|
||||
|
||||
def test_consecutive_empty_counter_resets_on_success(
|
||||
self, fresh_seeder: _AssetSeeder,
|
||||
):
|
||||
"""A successful batch resets the consecutive empty counter."""
|
||||
call_count = 0
|
||||
|
||||
def fake_get_unenriched(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count <= 6:
|
||||
return [_make_row(f"r{call_count}", f"a{call_count}")]
|
||||
return []
|
||||
|
||||
def fake_enrich(rows, **kwargs):
|
||||
ref_id = rows[0].reference_id
|
||||
# Fail batches 1-2, succeed batch 3, fail batches 4-5, succeed batch 6
|
||||
if ref_id in ("r1", "r2", "r4", "r5"):
|
||||
return 0, [ref_id]
|
||||
return 1, []
|
||||
|
||||
with (
|
||||
patch("app.assets.seeder.dependencies_available", return_value=True),
|
||||
patch("app.assets.seeder.sync_root_safely", return_value=set()),
|
||||
patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
|
||||
patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
|
||||
patch("app.assets.seeder.insert_asset_specs", return_value=0),
|
||||
patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
|
||||
patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
|
||||
):
|
||||
fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
|
||||
fresh_seeder.wait(timeout=5.0)
|
||||
|
||||
# All 6 batches should run + 1 final call returning empty
|
||||
assert call_count == 7
|
||||
status = fresh_seeder.get_status()
|
||||
assert status.state == State.IDLE
|
||||
Reference in New Issue
Block a user