mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-18 22:20:03 +00:00
This change solves the basename collision bug by using UNIQUE(file_path) on the unified asset_references table. Key changes: Database: - Migration 0005 merges asset_cache_states and asset_infos into asset_references - AssetReference now contains: cache state fields (file_path, mtime_ns, needs_verify, is_missing, enrichment_level) plus info fields (name, owner_id, preview_id, etc.) - AssetReferenceMeta replaces AssetInfoMeta - AssetReferenceTag replaces AssetInfoTag - UNIQUE constraint on file_path prevents duplicate entries for same file Code: - New unified query module: asset_reference.py (replaces asset_info.py, cache_state.py) - Updated scanner, seeder, and services to use AssetReference - Updated API routes to use reference_id instead of asset_info_id Tests: - All 175 unit tests updated and passing - Integration tests require server environment (not run here) Amp-Thread-ID: https://ampcode.com/threads/T-019c4fe8-9dcb-75ce-bea8-ea786343a581 Co-authored-by: Amp <amp@ampcode.com>
500 lines
17 KiB
Python
500 lines
17 KiB
Python
"""Tests for cache_state (AssetReference file path) query functions."""
|
|
import pytest
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.assets.database.models import Asset, AssetReference
|
|
from app.assets.database.queries import (
|
|
list_references_by_asset_id,
|
|
upsert_reference,
|
|
get_unreferenced_unhashed_asset_ids,
|
|
delete_assets_by_ids,
|
|
get_references_for_prefixes,
|
|
bulk_update_needs_verify,
|
|
delete_references_by_ids,
|
|
delete_orphaned_seed_asset,
|
|
bulk_insert_references_ignore_conflicts,
|
|
get_references_by_paths_and_asset_ids,
|
|
mark_references_missing_outside_prefixes,
|
|
restore_references_by_paths,
|
|
)
|
|
from app.assets.helpers import select_best_live_path, get_utc_now
|
|
|
|
|
|
def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:
|
|
asset = Asset(hash=hash_val, size_bytes=size)
|
|
session.add(asset)
|
|
session.flush()
|
|
return asset
|
|
|
|
|
|
def _make_reference(
|
|
session: Session,
|
|
asset: Asset,
|
|
file_path: str,
|
|
name: str = "test",
|
|
mtime_ns: int | None = None,
|
|
needs_verify: bool = False,
|
|
) -> AssetReference:
|
|
now = get_utc_now()
|
|
ref = AssetReference(
|
|
asset_id=asset.id,
|
|
file_path=file_path,
|
|
name=name,
|
|
mtime_ns=mtime_ns,
|
|
needs_verify=needs_verify,
|
|
created_at=now,
|
|
updated_at=now,
|
|
last_access_time=now,
|
|
)
|
|
session.add(ref)
|
|
session.flush()
|
|
return ref
|
|
|
|
|
|
class TestListReferencesByAssetId:
|
|
def test_returns_empty_for_no_references(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
|
assert list(refs) == []
|
|
|
|
def test_returns_references_for_asset(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
_make_reference(session, asset, "/path/a.bin", name="a")
|
|
_make_reference(session, asset, "/path/b.bin", name="b")
|
|
session.commit()
|
|
|
|
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
|
paths = [r.file_path for r in refs]
|
|
assert set(paths) == {"/path/a.bin", "/path/b.bin"}
|
|
|
|
def test_does_not_return_other_assets_references(self, session: Session):
|
|
asset1 = _make_asset(session, "hash1")
|
|
asset2 = _make_asset(session, "hash2")
|
|
_make_reference(session, asset1, "/path/asset1.bin", name="a1")
|
|
_make_reference(session, asset2, "/path/asset2.bin", name="a2")
|
|
session.commit()
|
|
|
|
refs = list_references_by_asset_id(session, asset_id=asset1.id)
|
|
paths = [r.file_path for r in refs]
|
|
assert paths == ["/path/asset1.bin"]
|
|
|
|
|
|
class TestSelectBestLivePath:
|
|
def test_returns_empty_for_empty_list(self):
|
|
result = select_best_live_path([])
|
|
assert result == ""
|
|
|
|
def test_returns_empty_when_no_files_exist(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
ref = _make_reference(session, asset, "/nonexistent/path.bin")
|
|
session.commit()
|
|
|
|
result = select_best_live_path([ref])
|
|
assert result == ""
|
|
|
|
def test_prefers_verified_path(self, session: Session, tmp_path):
|
|
"""needs_verify=False should be preferred."""
|
|
asset = _make_asset(session, "hash1")
|
|
|
|
verified_file = tmp_path / "verified.bin"
|
|
verified_file.write_bytes(b"data")
|
|
|
|
unverified_file = tmp_path / "unverified.bin"
|
|
unverified_file.write_bytes(b"data")
|
|
|
|
ref_verified = _make_reference(
|
|
session, asset, str(verified_file), name="verified", needs_verify=False
|
|
)
|
|
ref_unverified = _make_reference(
|
|
session, asset, str(unverified_file), name="unverified", needs_verify=True
|
|
)
|
|
session.commit()
|
|
|
|
refs = [ref_unverified, ref_verified]
|
|
result = select_best_live_path(refs)
|
|
assert result == str(verified_file)
|
|
|
|
def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
|
|
"""If all references need verification, return first existing path."""
|
|
asset = _make_asset(session, "hash1")
|
|
|
|
existing_file = tmp_path / "exists.bin"
|
|
existing_file.write_bytes(b"data")
|
|
|
|
ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
|
|
session.commit()
|
|
|
|
result = select_best_live_path([ref])
|
|
assert result == str(existing_file)
|
|
|
|
|
|
class TestSelectBestLivePathWithMocking:
|
|
def test_handles_missing_file_path_attr(self):
|
|
"""Gracefully handle references with None file_path."""
|
|
|
|
class MockRef:
|
|
file_path = None
|
|
needs_verify = False
|
|
|
|
result = select_best_live_path([MockRef()])
|
|
assert result == ""
|
|
|
|
|
|
class TestUpsertReference:
|
|
@pytest.mark.parametrize(
|
|
"initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
|
|
[
|
|
# New reference creation
|
|
(None, 12345, True, False, 12345),
|
|
# Existing reference, same mtime - no update
|
|
(100, 100, False, False, 100),
|
|
# Existing reference, different mtime - update
|
|
(100, 200, False, True, 200),
|
|
],
|
|
ids=["new_reference", "existing_no_change", "existing_update_mtime"],
|
|
)
|
|
def test_upsert_scenarios(
|
|
self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
|
|
):
|
|
asset = _make_asset(session, "hash1")
|
|
file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
|
|
name = f"file_{initial_mtime}_{second_mtime}"
|
|
|
|
# Create initial reference if needed
|
|
if initial_mtime is not None:
|
|
upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
|
|
session.commit()
|
|
|
|
# The upsert call we're testing
|
|
created, updated = upsert_reference(
|
|
session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
|
|
)
|
|
session.commit()
|
|
|
|
assert created is expect_created
|
|
assert updated is expect_updated
|
|
ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
|
assert ref.mtime_ns == final_mtime
|
|
|
|
def test_upsert_restores_missing_reference(self, session: Session):
|
|
"""Upserting a reference that was marked missing should restore it."""
|
|
asset = _make_asset(session, "hash1")
|
|
file_path = "/restored/file.bin"
|
|
|
|
ref = _make_reference(session, asset, file_path, mtime_ns=100)
|
|
ref.is_missing = True
|
|
session.commit()
|
|
|
|
created, updated = upsert_reference(
|
|
session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
|
|
)
|
|
session.commit()
|
|
|
|
assert created is False
|
|
assert updated is True
|
|
restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
|
assert restored_ref.is_missing is False
|
|
|
|
|
|
class TestRestoreReferencesByPaths:
|
|
def test_restores_missing_references(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
missing_path = "/missing/file.bin"
|
|
active_path = "/active/file.bin"
|
|
|
|
missing_ref = _make_reference(session, asset, missing_path, name="missing")
|
|
missing_ref.is_missing = True
|
|
_make_reference(session, asset, active_path, name="active")
|
|
session.commit()
|
|
|
|
restored = restore_references_by_paths(session, [missing_path])
|
|
session.commit()
|
|
|
|
assert restored == 1
|
|
ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
|
|
assert ref.is_missing is False
|
|
|
|
def test_empty_list_restores_nothing(self, session: Session):
|
|
restored = restore_references_by_paths(session, [])
|
|
assert restored == 0
|
|
|
|
|
|
class TestMarkReferencesMissingOutsidePrefixes:
|
|
def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
|
|
asset = _make_asset(session, "hash1")
|
|
valid_dir = tmp_path / "valid"
|
|
valid_dir.mkdir()
|
|
invalid_dir = tmp_path / "invalid"
|
|
invalid_dir.mkdir()
|
|
|
|
valid_path = str(valid_dir / "file.bin")
|
|
invalid_path = str(invalid_dir / "file.bin")
|
|
|
|
_make_reference(session, asset, valid_path, name="valid")
|
|
_make_reference(session, asset, invalid_path, name="invalid")
|
|
session.commit()
|
|
|
|
marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
|
|
session.commit()
|
|
|
|
assert marked == 1
|
|
all_refs = session.query(AssetReference).all()
|
|
assert len(all_refs) == 2
|
|
|
|
valid_ref = next(r for r in all_refs if r.file_path == valid_path)
|
|
invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
|
|
assert valid_ref.is_missing is False
|
|
assert invalid_ref.is_missing is True
|
|
|
|
def test_empty_prefixes_marks_nothing(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
_make_reference(session, asset, "/some/path.bin")
|
|
session.commit()
|
|
|
|
marked = mark_references_missing_outside_prefixes(session, [])
|
|
|
|
assert marked == 0
|
|
|
|
|
|
class TestGetUnreferencedUnhashedAssetIds:
|
|
def test_returns_unreferenced_unhashed_assets(self, session: Session):
|
|
# Unhashed asset (hash=None) with no references (no file_path)
|
|
no_refs = _make_asset(session, hash_val=None)
|
|
# Unhashed asset with active reference (not unreferenced)
|
|
with_active_ref = _make_asset(session, hash_val=None)
|
|
_make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
|
|
# Unhashed asset with only missing reference (should be unreferenced)
|
|
with_missing_ref = _make_asset(session, hash_val=None)
|
|
missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
|
|
missing_ref.is_missing = True
|
|
# Regular asset (hash not None) - should not be returned
|
|
_make_asset(session, hash_val="blake3:regular")
|
|
session.commit()
|
|
|
|
unreferenced = get_unreferenced_unhashed_asset_ids(session)
|
|
|
|
assert no_refs.id in unreferenced
|
|
assert with_missing_ref.id in unreferenced
|
|
assert with_active_ref.id not in unreferenced
|
|
|
|
|
|
class TestDeleteAssetsByIds:
|
|
def test_deletes_assets_and_references(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
_make_reference(session, asset, "/test/path.bin", name="test")
|
|
session.commit()
|
|
|
|
deleted = delete_assets_by_ids(session, [asset.id])
|
|
session.commit()
|
|
|
|
assert deleted == 1
|
|
assert session.query(Asset).count() == 0
|
|
assert session.query(AssetReference).count() == 0
|
|
|
|
def test_empty_list_deletes_nothing(self, session: Session):
|
|
_make_asset(session, "hash1")
|
|
session.commit()
|
|
|
|
deleted = delete_assets_by_ids(session, [])
|
|
|
|
assert deleted == 0
|
|
assert session.query(Asset).count() == 1
|
|
|
|
|
|
class TestGetReferencesForPrefixes:
|
|
def test_returns_references_matching_prefix(self, session: Session, tmp_path):
|
|
asset = _make_asset(session, "hash1")
|
|
dir1 = tmp_path / "dir1"
|
|
dir1.mkdir()
|
|
dir2 = tmp_path / "dir2"
|
|
dir2.mkdir()
|
|
|
|
path1 = str(dir1 / "file.bin")
|
|
path2 = str(dir2 / "file.bin")
|
|
|
|
_make_reference(session, asset, path1, name="file1", mtime_ns=100)
|
|
_make_reference(session, asset, path2, name="file2", mtime_ns=200)
|
|
session.commit()
|
|
|
|
rows = get_references_for_prefixes(session, [str(dir1)])
|
|
|
|
assert len(rows) == 1
|
|
assert rows[0].file_path == path1
|
|
|
|
def test_empty_prefixes_returns_empty(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
_make_reference(session, asset, "/some/path.bin")
|
|
session.commit()
|
|
|
|
rows = get_references_for_prefixes(session, [])
|
|
|
|
assert rows == []
|
|
|
|
|
|
class TestBulkSetNeedsVerify:
|
|
def test_sets_needs_verify_flag(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
|
|
ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
|
|
session.commit()
|
|
|
|
updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
|
|
session.commit()
|
|
|
|
assert updated == 2
|
|
session.refresh(ref1)
|
|
session.refresh(ref2)
|
|
assert ref1.needs_verify is True
|
|
assert ref2.needs_verify is True
|
|
|
|
def test_empty_list_updates_nothing(self, session: Session):
|
|
updated = bulk_update_needs_verify(session, [], True)
|
|
assert updated == 0
|
|
|
|
|
|
class TestDeleteReferencesByIds:
|
|
def test_deletes_references_by_id(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
ref1 = _make_reference(session, asset, "/path1.bin")
|
|
_make_reference(session, asset, "/path2.bin")
|
|
session.commit()
|
|
|
|
deleted = delete_references_by_ids(session, [ref1.id])
|
|
session.commit()
|
|
|
|
assert deleted == 1
|
|
assert session.query(AssetReference).count() == 1
|
|
|
|
def test_empty_list_deletes_nothing(self, session: Session):
|
|
deleted = delete_references_by_ids(session, [])
|
|
assert deleted == 0
|
|
|
|
|
|
class TestDeleteOrphanedSeedAsset:
|
|
@pytest.mark.parametrize(
|
|
"create_asset,expected_deleted,expected_count",
|
|
[
|
|
(True, True, 0), # Existing asset gets deleted
|
|
(False, False, 0), # Nonexistent returns False
|
|
],
|
|
ids=["deletes_existing", "nonexistent_returns_false"],
|
|
)
|
|
def test_delete_orphaned_seed_asset(
|
|
self, session: Session, create_asset, expected_deleted, expected_count
|
|
):
|
|
asset_id = "nonexistent-id"
|
|
if create_asset:
|
|
asset = _make_asset(session, hash_val=None)
|
|
asset_id = asset.id
|
|
_make_reference(session, asset, "/test/path.bin", name="test")
|
|
session.commit()
|
|
|
|
deleted = delete_orphaned_seed_asset(session, asset_id)
|
|
if create_asset:
|
|
session.commit()
|
|
|
|
assert deleted is expected_deleted
|
|
assert session.query(Asset).count() == expected_count
|
|
|
|
|
|
class TestBulkInsertReferencesIgnoreConflicts:
|
|
def test_inserts_multiple_references(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
now = get_utc_now()
|
|
rows = [
|
|
{
|
|
"asset_id": asset.id,
|
|
"file_path": "/bulk1.bin",
|
|
"name": "bulk1",
|
|
"mtime_ns": 100,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"last_access_time": now,
|
|
},
|
|
{
|
|
"asset_id": asset.id,
|
|
"file_path": "/bulk2.bin",
|
|
"name": "bulk2",
|
|
"mtime_ns": 200,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"last_access_time": now,
|
|
},
|
|
]
|
|
bulk_insert_references_ignore_conflicts(session, rows)
|
|
session.commit()
|
|
|
|
assert session.query(AssetReference).count() == 2
|
|
|
|
def test_ignores_conflicts(self, session: Session):
|
|
asset = _make_asset(session, "hash1")
|
|
_make_reference(session, asset, "/existing.bin", mtime_ns=100)
|
|
session.commit()
|
|
|
|
now = get_utc_now()
|
|
rows = [
|
|
{
|
|
"asset_id": asset.id,
|
|
"file_path": "/existing.bin",
|
|
"name": "existing",
|
|
"mtime_ns": 999,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"last_access_time": now,
|
|
},
|
|
{
|
|
"asset_id": asset.id,
|
|
"file_path": "/new.bin",
|
|
"name": "new",
|
|
"mtime_ns": 200,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"last_access_time": now,
|
|
},
|
|
]
|
|
bulk_insert_references_ignore_conflicts(session, rows)
|
|
session.commit()
|
|
|
|
assert session.query(AssetReference).count() == 2
|
|
existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
|
|
assert existing.mtime_ns == 100 # Original value preserved
|
|
|
|
def test_empty_list_is_noop(self, session: Session):
|
|
bulk_insert_references_ignore_conflicts(session, [])
|
|
assert session.query(AssetReference).count() == 0
|
|
|
|
|
|
class TestGetReferencesByPathsAndAssetIds:
|
|
def test_returns_matching_paths(self, session: Session):
|
|
asset1 = _make_asset(session, "hash1")
|
|
asset2 = _make_asset(session, "hash2")
|
|
|
|
_make_reference(session, asset1, "/path1.bin")
|
|
_make_reference(session, asset2, "/path2.bin")
|
|
session.commit()
|
|
|
|
path_to_asset = {
|
|
"/path1.bin": asset1.id,
|
|
"/path2.bin": asset2.id,
|
|
}
|
|
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
|
|
|
assert winners == {"/path1.bin", "/path2.bin"}
|
|
|
|
def test_excludes_non_matching_asset_ids(self, session: Session):
|
|
asset1 = _make_asset(session, "hash1")
|
|
asset2 = _make_asset(session, "hash2")
|
|
|
|
_make_reference(session, asset1, "/path1.bin")
|
|
session.commit()
|
|
|
|
# Path exists but with different asset_id
|
|
path_to_asset = {"/path1.bin": asset2.id}
|
|
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
|
|
|
assert winners == set()
|
|
|
|
def test_empty_dict_returns_empty(self, session: Session):
|
|
winners = get_references_by_paths_and_asset_ids(session, {})
|
|
assert winners == set()
|