mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-18 22:20:03 +00:00
refactor(assets): merge AssetInfo and AssetCacheState into AssetReference
This change solves the basename collision bug by using UNIQUE(file_path) on the unified asset_references table. Key changes: Database: - Migration 0005 merges asset_cache_states and asset_infos into asset_references - AssetReference now contains: cache state fields (file_path, mtime_ns, needs_verify, is_missing, enrichment_level) plus info fields (name, owner_id, preview_id, etc.) - AssetReferenceMeta replaces AssetInfoMeta - AssetReferenceTag replaces AssetInfoTag - UNIQUE constraint on file_path prevents duplicate entries for same file Code: - New unified query module: asset_reference.py (replaces asset_info.py, cache_state.py) - Updated scanner, seeder, and services to use AssetReference - Updated API routes to use reference_id instead of asset_info_id Tests: - All 175 unit tests updated and passing - Integration tests require server environment (not run here) Amp-Thread-ID: https://ampcode.com/threads/T-019c4fe8-9dcb-75ce-bea8-ea786343a581 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Drop unique constraint on assets_info (asset_id, owner_id, name)
|
||||
|
||||
Allow multiple files with the same name to reference the same asset.
|
||||
|
||||
Revision ID: 0004_drop_asset_info_unique
|
||||
Revises: 0003_add_enrichment_level
|
||||
Create Date: 2025-02-11 00:00:00
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
revision = "0004_drop_asset_info_unique"
|
||||
down_revision = "0003_add_enrichment_level"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
with op.batch_alter_table("assets_info") as batch_op:
|
||||
batch_op.drop_constraint(
|
||||
"uq_assets_info_asset_owner_name", type_="unique"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
with op.batch_alter_table("assets_info") as batch_op:
|
||||
batch_op.create_unique_constraint(
|
||||
"uq_assets_info_asset_owner_name",
|
||||
["asset_id", "owner_id", "name"],
|
||||
)
|
||||
422
alembic_db/versions/0005_merge_to_asset_references.py
Normal file
422
alembic_db/versions/0005_merge_to_asset_references.py
Normal file
@@ -0,0 +1,422 @@
|
||||
"""
|
||||
Merge AssetInfo and AssetCacheState into unified asset_references table.
|
||||
|
||||
This migration:
|
||||
1. Creates asset_references table with combined columns
|
||||
2. Creates asset_reference_tags and asset_reference_meta tables
|
||||
3. Migrates data from assets_info and asset_cache_state, merging where unambiguous
|
||||
4. Migrates tags and metadata
|
||||
5. Drops old tables
|
||||
|
||||
Revision ID: 0005_merge_to_asset_references
|
||||
Revises: 0004_drop_asset_info_unique_constraint
|
||||
Create Date: 2025-02-11
|
||||
"""
|
||||
# ruff: noqa: E501
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
|
||||
revision = "0005_merge_to_asset_references"
|
||||
down_revision = "0004_drop_asset_info_unique_constraint"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
# Step 1: Create asset_references table
|
||||
op.create_table(
|
||||
"asset_references",
|
||||
sa.Column("id", sa.String(length=36), primary_key=True),
|
||||
sa.Column(
|
||||
"asset_id",
|
||||
sa.String(length=36),
|
||||
sa.ForeignKey("assets.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
# From AssetCacheState
|
||||
sa.Column("file_path", sa.Text(), nullable=True),
|
||||
sa.Column("mtime_ns", sa.BigInteger(), nullable=True),
|
||||
sa.Column(
|
||||
"needs_verify",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default=sa.text("false"),
|
||||
),
|
||||
sa.Column(
|
||||
"is_missing", sa.Boolean(), nullable=False, server_default=sa.text("false")
|
||||
),
|
||||
sa.Column("enrichment_level", sa.Integer(), nullable=False, server_default="0"),
|
||||
# From AssetInfo
|
||||
sa.Column("owner_id", sa.String(length=128), nullable=False, server_default=""),
|
||||
sa.Column("name", sa.String(length=512), nullable=False),
|
||||
sa.Column(
|
||||
"preview_id",
|
||||
sa.String(length=36),
|
||||
sa.ForeignKey("assets.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column("user_metadata", sa.JSON(), nullable=True),
|
||||
sa.Column("created_at", sa.DateTime(timezone=False), nullable=False),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=False), nullable=False),
|
||||
sa.Column("last_access_time", sa.DateTime(timezone=False), nullable=False),
|
||||
# Constraints
|
||||
sa.CheckConstraint(
|
||||
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg"
|
||||
),
|
||||
sa.CheckConstraint(
|
||||
"enrichment_level >= 0 AND enrichment_level <= 2",
|
||||
name="ck_ar_enrichment_level_range",
|
||||
),
|
||||
)
|
||||
|
||||
# Create unique index on file_path where not null (partial unique).
|
||||
# SQLite UNIQUE on nullable columns works as expected.
|
||||
op.create_index(
|
||||
"uq_asset_references_file_path",
|
||||
"asset_references",
|
||||
["file_path"],
|
||||
unique=True,
|
||||
)
|
||||
op.create_index("ix_asset_references_asset_id", "asset_references", ["asset_id"])
|
||||
op.create_index("ix_asset_references_owner_id", "asset_references", ["owner_id"])
|
||||
op.create_index("ix_asset_references_name", "asset_references", ["name"])
|
||||
op.create_index(
|
||||
"ix_asset_references_is_missing", "asset_references", ["is_missing"]
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_references_enrichment_level", "asset_references", ["enrichment_level"]
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_references_created_at", "asset_references", ["created_at"]
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_references_last_access_time", "asset_references", ["last_access_time"]
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_references_owner_name", "asset_references", ["owner_id", "name"]
|
||||
)
|
||||
|
||||
# Step 2: Create asset_reference_tags table
|
||||
op.create_table(
|
||||
"asset_reference_tags",
|
||||
sa.Column(
|
||||
"asset_reference_id",
|
||||
sa.String(length=36),
|
||||
sa.ForeignKey("asset_references.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"tag_name",
|
||||
sa.String(length=512),
|
||||
sa.ForeignKey("tags.name", ondelete="RESTRICT"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"origin", sa.String(length=32), nullable=False, server_default="manual"
|
||||
),
|
||||
sa.Column("added_at", sa.DateTime(timezone=False), nullable=False),
|
||||
sa.PrimaryKeyConstraint(
|
||||
"asset_reference_id", "tag_name", name="pk_asset_reference_tags"
|
||||
),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_reference_tags_tag_name", "asset_reference_tags", ["tag_name"]
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_reference_tags_asset_reference_id",
|
||||
"asset_reference_tags",
|
||||
["asset_reference_id"],
|
||||
)
|
||||
|
||||
# Step 3: Create asset_reference_meta table
|
||||
op.create_table(
|
||||
"asset_reference_meta",
|
||||
sa.Column(
|
||||
"asset_reference_id",
|
||||
sa.String(length=36),
|
||||
sa.ForeignKey("asset_references.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("key", sa.String(length=256), nullable=False),
|
||||
sa.Column("ordinal", sa.Integer(), nullable=False, server_default="0"),
|
||||
sa.Column("val_str", sa.String(length=2048), nullable=True),
|
||||
sa.Column("val_num", sa.Numeric(38, 10), nullable=True),
|
||||
sa.Column("val_bool", sa.Boolean(), nullable=True),
|
||||
sa.Column("val_json", sa.JSON(), nullable=True),
|
||||
sa.PrimaryKeyConstraint(
|
||||
"asset_reference_id", "key", "ordinal", name="pk_asset_reference_meta"
|
||||
),
|
||||
)
|
||||
op.create_index("ix_asset_reference_meta_key", "asset_reference_meta", ["key"])
|
||||
op.create_index(
|
||||
"ix_asset_reference_meta_key_val_str",
|
||||
"asset_reference_meta",
|
||||
["key", "val_str"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_reference_meta_key_val_num",
|
||||
"asset_reference_meta",
|
||||
["key", "val_num"],
|
||||
)
|
||||
op.create_index(
|
||||
"ix_asset_reference_meta_key_val_bool",
|
||||
"asset_reference_meta",
|
||||
["key", "val_bool"],
|
||||
)
|
||||
|
||||
# Step 4: Migrate data
|
||||
# Create mapping from cache_state to info that should absorb it.
|
||||
# Merge when: same asset_id AND exactly one cache_state AND basename == name
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
# Find unambiguous matches: assets_info rows that have exactly one matching cache_state
|
||||
# where basename(file_path) == name AND same asset_id
|
||||
# We'll do this in Python for clarity and SQLite compatibility
|
||||
|
||||
# Get all assets_info rows
|
||||
info_rows = conn.execute(
|
||||
text("""
|
||||
SELECT id, owner_id, name, asset_id, preview_id, user_metadata,
|
||||
created_at, updated_at, last_access_time
|
||||
FROM assets_info
|
||||
""")
|
||||
).fetchall()
|
||||
|
||||
# Get all asset_cache_state rows
|
||||
cache_rows = conn.execute(
|
||||
text("""
|
||||
SELECT id, asset_id, file_path, mtime_ns, needs_verify, is_missing, enrichment_level
|
||||
FROM asset_cache_state
|
||||
""")
|
||||
).fetchall()
|
||||
|
||||
# Build mapping: asset_id -> list of cache_state rows
|
||||
cache_by_asset: dict = {}
|
||||
for row in cache_rows:
|
||||
(
|
||||
cache_id,
|
||||
asset_id,
|
||||
file_path,
|
||||
mtime_ns,
|
||||
needs_verify,
|
||||
is_missing,
|
||||
enrichment_level,
|
||||
) = row
|
||||
if asset_id not in cache_by_asset:
|
||||
cache_by_asset[asset_id] = []
|
||||
cache_by_asset[asset_id].append(
|
||||
{
|
||||
"cache_id": cache_id,
|
||||
"file_path": file_path,
|
||||
"mtime_ns": mtime_ns,
|
||||
"needs_verify": needs_verify,
|
||||
"is_missing": is_missing,
|
||||
"enrichment_level": enrichment_level,
|
||||
}
|
||||
)
|
||||
|
||||
# Track which cache_states get merged (so we don't insert them separately)
|
||||
merged_cache_ids: set = set()
|
||||
# Track info_id -> cache_data for merged rows
|
||||
info_to_cache: dict = {}
|
||||
|
||||
for info_row in info_rows:
|
||||
(
|
||||
info_id,
|
||||
owner_id,
|
||||
name,
|
||||
asset_id,
|
||||
preview_id,
|
||||
user_metadata,
|
||||
created_at,
|
||||
updated_at,
|
||||
last_access,
|
||||
) = info_row
|
||||
caches = cache_by_asset.get(asset_id, [])
|
||||
|
||||
# Only merge if exactly one cache_state AND basename matches
|
||||
if len(caches) == 1:
|
||||
cache = caches[0]
|
||||
basename = os.path.basename(cache["file_path"])
|
||||
if basename == name:
|
||||
merged_cache_ids.add(cache["cache_id"])
|
||||
info_to_cache[info_id] = cache
|
||||
|
||||
# Insert merged and non-merged assets_info rows into asset_references
|
||||
for info_row in info_rows:
|
||||
(
|
||||
info_id,
|
||||
owner_id,
|
||||
name,
|
||||
asset_id,
|
||||
preview_id,
|
||||
user_metadata,
|
||||
created_at,
|
||||
updated_at,
|
||||
last_access,
|
||||
) = info_row
|
||||
|
||||
cache = info_to_cache.get(info_id)
|
||||
if cache:
|
||||
# Merged row: has file_path and cache data
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO asset_references (
|
||||
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
|
||||
enrichment_level, owner_id, name, preview_id, user_metadata,
|
||||
created_at, updated_at, last_access_time
|
||||
) VALUES (
|
||||
:id, :asset_id, :file_path, :mtime_ns, :needs_verify, :is_missing,
|
||||
:enrichment_level, :owner_id, :name, :preview_id, :user_metadata,
|
||||
:created_at, :updated_at, :last_access_time
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"id": info_id,
|
||||
"asset_id": asset_id,
|
||||
"file_path": cache["file_path"],
|
||||
"mtime_ns": cache["mtime_ns"],
|
||||
"needs_verify": cache["needs_verify"],
|
||||
"is_missing": cache["is_missing"],
|
||||
"enrichment_level": cache["enrichment_level"],
|
||||
"owner_id": owner_id or "",
|
||||
"name": name,
|
||||
"preview_id": preview_id,
|
||||
"user_metadata": user_metadata,
|
||||
"created_at": created_at,
|
||||
"updated_at": updated_at,
|
||||
"last_access_time": last_access,
|
||||
},
|
||||
)
|
||||
else:
|
||||
# Non-merged row: no file_path
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO asset_references (
|
||||
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
|
||||
enrichment_level, owner_id, name, preview_id, user_metadata,
|
||||
created_at, updated_at, last_access_time
|
||||
) VALUES (
|
||||
:id, :asset_id, NULL, NULL, false, false, 0,
|
||||
:owner_id, :name, :preview_id, :user_metadata,
|
||||
:created_at, :updated_at, :last_access_time
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"id": info_id,
|
||||
"asset_id": asset_id,
|
||||
"owner_id": owner_id or "",
|
||||
"name": name,
|
||||
"preview_id": preview_id,
|
||||
"user_metadata": user_metadata,
|
||||
"created_at": created_at,
|
||||
"updated_at": updated_at,
|
||||
"last_access_time": last_access,
|
||||
},
|
||||
)
|
||||
|
||||
# Insert remaining (non-merged) cache_state rows as new asset_references
|
||||
for cache_row in cache_rows:
|
||||
(
|
||||
cache_id,
|
||||
asset_id,
|
||||
file_path,
|
||||
mtime_ns,
|
||||
needs_verify,
|
||||
is_missing,
|
||||
enrichment_level,
|
||||
) = cache_row
|
||||
if cache_id in merged_cache_ids:
|
||||
continue
|
||||
|
||||
new_id = str(uuid.uuid4())
|
||||
basename = os.path.basename(file_path) if file_path else "unknown"
|
||||
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO asset_references (
|
||||
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
|
||||
enrichment_level, owner_id, name, preview_id, user_metadata,
|
||||
created_at, updated_at, last_access_time
|
||||
) VALUES (
|
||||
:id, :asset_id, :file_path, :mtime_ns, :needs_verify, :is_missing,
|
||||
:enrichment_level, '', :name, NULL, NULL,
|
||||
:now, :now, :now
|
||||
)
|
||||
"""),
|
||||
{
|
||||
"id": new_id,
|
||||
"asset_id": asset_id,
|
||||
"file_path": file_path,
|
||||
"mtime_ns": mtime_ns,
|
||||
"needs_verify": needs_verify,
|
||||
"is_missing": is_missing,
|
||||
"enrichment_level": enrichment_level,
|
||||
"name": basename,
|
||||
"now": now,
|
||||
},
|
||||
)
|
||||
|
||||
# Step 5: Migrate tags (asset_info_id maps directly to asset_reference_id since we reused IDs)
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO asset_reference_tags (asset_reference_id, tag_name, origin, added_at)
|
||||
SELECT asset_info_id, tag_name, origin, added_at
|
||||
FROM asset_info_tags
|
||||
WHERE asset_info_id IN (SELECT id FROM asset_references)
|
||||
""")
|
||||
)
|
||||
|
||||
# Step 6: Migrate metadata
|
||||
conn.execute(
|
||||
text("""
|
||||
INSERT INTO asset_reference_meta (asset_reference_id, key, ordinal, val_str, val_num, val_bool, val_json)
|
||||
SELECT asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json
|
||||
FROM asset_info_meta
|
||||
WHERE asset_info_id IN (SELECT id FROM asset_references)
|
||||
""")
|
||||
)
|
||||
|
||||
# Step 7: Drop old tables
|
||||
op.drop_index("ix_asset_info_meta_key_val_bool", table_name="asset_info_meta")
|
||||
op.drop_index("ix_asset_info_meta_key_val_num", table_name="asset_info_meta")
|
||||
op.drop_index("ix_asset_info_meta_key_val_str", table_name="asset_info_meta")
|
||||
op.drop_index("ix_asset_info_meta_key", table_name="asset_info_meta")
|
||||
op.drop_table("asset_info_meta")
|
||||
|
||||
op.drop_index("ix_asset_info_tags_asset_info_id", table_name="asset_info_tags")
|
||||
op.drop_index("ix_asset_info_tags_tag_name", table_name="asset_info_tags")
|
||||
op.drop_table("asset_info_tags")
|
||||
|
||||
op.drop_index("ix_asset_cache_state_asset_id", table_name="asset_cache_state")
|
||||
op.drop_index("ix_asset_cache_state_file_path", table_name="asset_cache_state")
|
||||
op.drop_index("ix_asset_cache_state_is_missing", table_name="asset_cache_state")
|
||||
op.drop_index(
|
||||
"ix_asset_cache_state_enrichment_level", table_name="asset_cache_state"
|
||||
)
|
||||
op.drop_table("asset_cache_state")
|
||||
|
||||
op.drop_index("ix_assets_info_owner_name", table_name="assets_info")
|
||||
op.drop_index("ix_assets_info_last_access_time", table_name="assets_info")
|
||||
op.drop_index("ix_assets_info_created_at", table_name="assets_info")
|
||||
op.drop_index("ix_assets_info_name", table_name="assets_info")
|
||||
op.drop_index("ix_assets_info_asset_id", table_name="assets_info")
|
||||
op.drop_index("ix_assets_info_owner_id", table_name="assets_info")
|
||||
op.drop_table("assets_info")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# This is a complex migration - downgrade would require careful data splitting
|
||||
# For safety, we don't support automatic downgrade
|
||||
raise NotImplementedError(
|
||||
"Downgrade from 0005_merge_to_asset_references is not supported. "
|
||||
"Please restore from backup if needed."
|
||||
)
|
||||
@@ -43,10 +43,10 @@ UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA
|
||||
|
||||
|
||||
def get_query_dict(request: web.Request) -> dict[str, Any]:
|
||||
"""
|
||||
Gets a dictionary of query parameters from the request.
|
||||
"""Gets a dictionary of query parameters from the request.
|
||||
|
||||
'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic.
|
||||
request.query is a MultiMapping[str], needs to be converted to a dict
|
||||
to be validated by Pydantic.
|
||||
"""
|
||||
query_dict = {
|
||||
key: request.query.getall(key)
|
||||
@@ -58,7 +58,8 @@ def get_query_dict(request: web.Request) -> dict[str, Any]:
|
||||
|
||||
|
||||
# Note to any custom node developers reading this code:
|
||||
# The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same.
|
||||
# The assets system is not yet fully implemented,
|
||||
# do not rely on the code in /app/assets remaining the same.
|
||||
|
||||
|
||||
def register_assets_system(
|
||||
@@ -80,6 +81,7 @@ def _build_error_response(
|
||||
|
||||
def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response:
|
||||
import json
|
||||
|
||||
errors = json.loads(ve.json())
|
||||
return _build_error_response(400, code, "Validation failed.", {"errors": errors})
|
||||
|
||||
@@ -142,15 +144,15 @@ async def list_assets_route(request: web.Request) -> web.Response:
|
||||
|
||||
summaries = [
|
||||
schemas_out.AssetSummary(
|
||||
id=item.info.id,
|
||||
name=item.info.name,
|
||||
id=item.ref.id,
|
||||
name=item.ref.name,
|
||||
asset_hash=item.asset.hash if item.asset else None,
|
||||
size=int(item.asset.size_bytes) if item.asset else None,
|
||||
mime_type=item.asset.mime_type if item.asset else None,
|
||||
tags=item.tags,
|
||||
created_at=item.info.created_at,
|
||||
updated_at=item.info.updated_at,
|
||||
last_access_time=item.info.last_access_time,
|
||||
created_at=item.ref.created_at,
|
||||
updated_at=item.ref.updated_at,
|
||||
last_access_time=item.ref.last_access_time,
|
||||
)
|
||||
for item in result.items
|
||||
]
|
||||
@@ -168,40 +170,40 @@ async def get_asset_route(request: web.Request) -> web.Response:
|
||||
"""
|
||||
GET request to get an asset's info as JSON.
|
||||
"""
|
||||
asset_info_id = str(uuid.UUID(request.match_info["id"]))
|
||||
reference_id = str(uuid.UUID(request.match_info["id"]))
|
||||
try:
|
||||
result = get_asset_detail(
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
if not result:
|
||||
return _build_error_response(
|
||||
404,
|
||||
"ASSET_NOT_FOUND",
|
||||
f"AssetInfo {asset_info_id} not found",
|
||||
{"id": asset_info_id},
|
||||
f"AssetReference {reference_id} not found",
|
||||
{"id": reference_id},
|
||||
)
|
||||
|
||||
payload = schemas_out.AssetDetail(
|
||||
id=result.info.id,
|
||||
name=result.info.name,
|
||||
id=result.ref.id,
|
||||
name=result.ref.name,
|
||||
asset_hash=result.asset.hash if result.asset else None,
|
||||
size=int(result.asset.size_bytes) if result.asset else None,
|
||||
mime_type=result.asset.mime_type if result.asset else None,
|
||||
tags=result.tags,
|
||||
user_metadata=result.info.user_metadata or {},
|
||||
preview_id=result.info.preview_id,
|
||||
created_at=result.info.created_at,
|
||||
last_access_time=result.info.last_access_time,
|
||||
user_metadata=result.ref.user_metadata or {},
|
||||
preview_id=result.ref.preview_id,
|
||||
created_at=result.ref.created_at,
|
||||
last_access_time=result.ref.last_access_time,
|
||||
)
|
||||
except ValueError as e:
|
||||
return _build_error_response(
|
||||
404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id}
|
||||
404, "ASSET_NOT_FOUND", str(e), {"id": reference_id}
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"get_asset failed for asset_info_id=%s, owner_id=%s",
|
||||
asset_info_id,
|
||||
"get_asset failed for reference_id=%s, owner_id=%s",
|
||||
reference_id,
|
||||
USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
@@ -216,7 +218,7 @@ async def download_asset_content(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
result = resolve_asset_for_download(
|
||||
asset_info_id=str(uuid.UUID(request.match_info["id"])),
|
||||
reference_id=str(uuid.UUID(request.match_info["id"])),
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
abs_path = result.abs_path
|
||||
@@ -232,16 +234,14 @@ async def download_asset_content(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
|
||||
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{urllib.parse.quote(quoted)}"
|
||||
encoded = urllib.parse.quote(quoted)
|
||||
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{encoded}"
|
||||
|
||||
file_size = os.path.getsize(abs_path)
|
||||
size_mb = file_size / (1024 * 1024)
|
||||
logging.info(
|
||||
"download_asset_content: path=%s, size=%d bytes (%.2f MB), content_type=%s, filename=%s",
|
||||
abs_path,
|
||||
file_size,
|
||||
file_size / (1024 * 1024),
|
||||
content_type,
|
||||
filename,
|
||||
"download_asset_content: path=%s, size=%d bytes (%.2f MB), type=%s, name=%s",
|
||||
abs_path, file_size, size_mb, content_type, filename,
|
||||
)
|
||||
|
||||
async def stream_file_chunks():
|
||||
@@ -288,16 +288,16 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
payload_out = schemas_out.AssetCreated(
|
||||
id=result.info.id,
|
||||
name=result.info.name,
|
||||
id=result.ref.id,
|
||||
name=result.ref.name,
|
||||
asset_hash=result.asset.hash,
|
||||
size=int(result.asset.size_bytes),
|
||||
mime_type=result.asset.mime_type,
|
||||
tags=result.tags,
|
||||
user_metadata=result.info.user_metadata or {},
|
||||
preview_id=result.info.preview_id,
|
||||
created_at=result.info.created_at,
|
||||
last_access_time=result.info.last_access_time,
|
||||
user_metadata=result.ref.user_metadata or {},
|
||||
preview_id=result.ref.preview_id,
|
||||
created_at=result.ref.created_at,
|
||||
last_access_time=result.ref.last_access_time,
|
||||
created_new=result.created_new,
|
||||
)
|
||||
return web.json_response(payload_out.model_dump(mode="json"), status=201)
|
||||
@@ -340,7 +340,7 @@ async def upload_asset(request: web.Request) -> web.Response:
|
||||
)
|
||||
|
||||
try:
|
||||
# Fast path: if a valid provided hash exists, create AssetInfo without writing anything
|
||||
# Fast path: hash exists, create AssetReference without writing anything
|
||||
if spec.hash and parsed.provided_hash_exists is True:
|
||||
result = create_from_hash(
|
||||
hash_str=spec.hash,
|
||||
@@ -391,16 +391,16 @@ async def upload_asset(request: web.Request) -> web.Response:
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
|
||||
payload = schemas_out.AssetCreated(
|
||||
id=result.info.id,
|
||||
name=result.info.name,
|
||||
id=result.ref.id,
|
||||
name=result.ref.name,
|
||||
asset_hash=result.asset.hash,
|
||||
size=int(result.asset.size_bytes),
|
||||
mime_type=result.asset.mime_type,
|
||||
tags=result.tags,
|
||||
user_metadata=result.info.user_metadata or {},
|
||||
preview_id=result.info.preview_id,
|
||||
created_at=result.info.created_at,
|
||||
last_access_time=result.info.last_access_time,
|
||||
user_metadata=result.ref.user_metadata or {},
|
||||
preview_id=result.ref.preview_id,
|
||||
created_at=result.ref.created_at,
|
||||
last_access_time=result.ref.last_access_time,
|
||||
created_new=result.created_new,
|
||||
)
|
||||
status = 201 if result.created_new else 200
|
||||
@@ -409,7 +409,7 @@ async def upload_asset(request: web.Request) -> web.Response:
|
||||
|
||||
@ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}")
|
||||
async def update_asset_route(request: web.Request) -> web.Response:
|
||||
asset_info_id = str(uuid.UUID(request.match_info["id"]))
|
||||
reference_id = str(uuid.UUID(request.match_info["id"]))
|
||||
try:
|
||||
body = schemas_in.UpdateAssetBody.model_validate(await request.json())
|
||||
except ValidationError as ve:
|
||||
@@ -421,27 +421,27 @@ async def update_asset_route(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
result = update_asset_metadata(
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
name=body.name,
|
||||
user_metadata=body.user_metadata,
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
payload = schemas_out.AssetUpdated(
|
||||
id=result.info.id,
|
||||
name=result.info.name,
|
||||
id=result.ref.id,
|
||||
name=result.ref.name,
|
||||
asset_hash=result.asset.hash if result.asset else None,
|
||||
tags=result.tags,
|
||||
user_metadata=result.info.user_metadata or {},
|
||||
updated_at=result.info.updated_at,
|
||||
user_metadata=result.ref.user_metadata or {},
|
||||
updated_at=result.ref.updated_at,
|
||||
)
|
||||
except (ValueError, PermissionError) as ve:
|
||||
return _build_error_response(
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"update_asset failed for asset_info_id=%s, owner_id=%s",
|
||||
asset_info_id,
|
||||
"update_asset failed for reference_id=%s, owner_id=%s",
|
||||
reference_id,
|
||||
USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
@@ -450,7 +450,7 @@ async def update_asset_route(request: web.Request) -> web.Response:
|
||||
|
||||
@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")
|
||||
async def delete_asset_route(request: web.Request) -> web.Response:
|
||||
asset_info_id = str(uuid.UUID(request.match_info["id"]))
|
||||
reference_id = str(uuid.UUID(request.match_info["id"]))
|
||||
delete_content_param = request.query.get("delete_content")
|
||||
delete_content = (
|
||||
True
|
||||
@@ -460,21 +460,21 @@ async def delete_asset_route(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
deleted = delete_asset_reference(
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
delete_content_if_orphan=delete_content,
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"delete_asset_reference failed for asset_info_id=%s, owner_id=%s",
|
||||
asset_info_id,
|
||||
"delete_asset_reference failed for reference_id=%s, owner_id=%s",
|
||||
reference_id,
|
||||
USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
|
||||
if not deleted:
|
||||
return _build_error_response(
|
||||
404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found."
|
||||
404, "ASSET_NOT_FOUND", f"AssetReference {reference_id} not found."
|
||||
)
|
||||
return web.Response(status=204)
|
||||
|
||||
@@ -490,8 +490,12 @@ async def get_tags(request: web.Request) -> web.Response:
|
||||
query = schemas_in.TagsListQuery.model_validate(query_map)
|
||||
except ValidationError as e:
|
||||
import json
|
||||
|
||||
return _build_error_response(
|
||||
400, "INVALID_QUERY", "Invalid query parameters", {"errors": json.loads(e.json())}
|
||||
400,
|
||||
"INVALID_QUERY",
|
||||
"Invalid query parameters",
|
||||
{"errors": json.loads(e.json())},
|
||||
)
|
||||
|
||||
rows, total = list_tags(
|
||||
@@ -515,7 +519,7 @@ async def get_tags(request: web.Request) -> web.Response:
|
||||
|
||||
@ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")
|
||||
async def add_asset_tags(request: web.Request) -> web.Response:
|
||||
asset_info_id = str(uuid.UUID(request.match_info["id"]))
|
||||
reference_id = str(uuid.UUID(request.match_info["id"]))
|
||||
try:
|
||||
json_payload = await request.json()
|
||||
data = schemas_in.TagsAdd.model_validate(json_payload)
|
||||
@@ -533,7 +537,7 @@ async def add_asset_tags(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
result = apply_tags(
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=data.tags,
|
||||
origin="manual",
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
@@ -545,12 +549,12 @@ async def add_asset_tags(request: web.Request) -> web.Response:
|
||||
)
|
||||
except (ValueError, PermissionError) as ve:
|
||||
return _build_error_response(
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"add_tags_to_asset failed for asset_info_id=%s, owner_id=%s",
|
||||
asset_info_id,
|
||||
"add_tags_to_asset failed for reference_id=%s, owner_id=%s",
|
||||
reference_id,
|
||||
USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
@@ -560,7 +564,7 @@ async def add_asset_tags(request: web.Request) -> web.Response:
|
||||
|
||||
@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags")
|
||||
async def delete_asset_tags(request: web.Request) -> web.Response:
|
||||
asset_info_id = str(uuid.UUID(request.match_info["id"]))
|
||||
reference_id = str(uuid.UUID(request.match_info["id"]))
|
||||
try:
|
||||
json_payload = await request.json()
|
||||
data = schemas_in.TagsRemove.model_validate(json_payload)
|
||||
@@ -578,7 +582,7 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
|
||||
|
||||
try:
|
||||
result = remove_tags(
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=data.tags,
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
@@ -589,12 +593,12 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
|
||||
)
|
||||
except ValueError as ve:
|
||||
return _build_error_response(
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
|
||||
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
|
||||
)
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"remove_tags_from_asset failed for asset_info_id=%s, owner_id=%s",
|
||||
asset_info_id,
|
||||
"remove_tags_from_asset failed for reference_id=%s, owner_id=%s",
|
||||
reference_id,
|
||||
USER_MANAGER.get_request_user_id(request),
|
||||
)
|
||||
return _build_error_response(500, "INTERNAL", "Unexpected server error.")
|
||||
@@ -683,11 +687,11 @@ async def cancel_seed(request: web.Request) -> web.Response:
|
||||
|
||||
@ROUTES.post("/api/assets/prune")
|
||||
async def mark_missing_assets(request: web.Request) -> web.Response:
|
||||
"""Mark assets as missing when their cache states point to files outside all known root prefixes.
|
||||
"""Mark assets as missing when outside all known root prefixes.
|
||||
|
||||
This is a non-destructive soft-delete operation. Assets and their metadata
|
||||
are preserved, but cache states are flagged as missing. They can be restored
|
||||
if the file reappears in a future scan.
|
||||
This is a non-destructive soft-delete operation. Assets and metadata
|
||||
are preserved, but references are flagged as missing. They can be
|
||||
restored if the file reappears in a future scan.
|
||||
|
||||
Returns:
|
||||
200 OK with count of marked assets
|
||||
|
||||
@@ -13,7 +13,7 @@ from pydantic import (
|
||||
|
||||
|
||||
class UploadError(Exception):
|
||||
"""Error during upload parsing with HTTP status and code (used in HTTP layer only)."""
|
||||
"""Error during upload parsing with HTTP status and code."""
|
||||
|
||||
def __init__(self, status: int, code: str, message: str):
|
||||
super().__init__(message)
|
||||
@@ -216,14 +216,14 @@ class TagsRemove(TagsAdd):
|
||||
|
||||
class UploadAssetSpec(BaseModel):
|
||||
"""Upload Asset operation.
|
||||
|
||||
- tags: ordered; first is root ('models'|'input'|'output');
|
||||
if root == 'models', second must be a valid category from folder_paths.folder_names_and_paths
|
||||
if root == 'models', second must be a valid category
|
||||
- name: display name
|
||||
- user_metadata: arbitrary JSON object (optional)
|
||||
- hash: optional canonical 'blake3:<hex>' provided by the client for validation / fast-path
|
||||
- hash: optional canonical 'blake3:<hex>' for validation / fast-path
|
||||
|
||||
Files created via this endpoint are stored on disk using the **content hash** as the filename stem
|
||||
and the original extension is preserved when available.
|
||||
Files are stored using the content hash as filename stem.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
|
||||
|
||||
@@ -95,7 +95,7 @@ async def parse_multipart_upload(
|
||||
file_client_name = (field.filename or "").strip()
|
||||
|
||||
if provided_hash and provided_hash_exists is True:
|
||||
# If client supplied a hash that we know exists, drain but do not write to disk
|
||||
# Hash exists - drain file but don't write to disk
|
||||
try:
|
||||
while True:
|
||||
chunk = await field.read_chunk(8 * 1024 * 1024)
|
||||
|
||||
@@ -16,7 +16,6 @@ from sqlalchemy import (
|
||||
Numeric,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship
|
||||
|
||||
@@ -37,29 +36,23 @@ class Asset(Base):
|
||||
DateTime(timezone=False), nullable=False, default=get_utc_now
|
||||
)
|
||||
|
||||
infos: Mapped[list[AssetInfo]] = relationship(
|
||||
"AssetInfo",
|
||||
references: Mapped[list[AssetReference]] = relationship(
|
||||
"AssetReference",
|
||||
back_populates="asset",
|
||||
primaryjoin=lambda: Asset.id == foreign(AssetInfo.asset_id),
|
||||
foreign_keys=lambda: [AssetInfo.asset_id],
|
||||
primaryjoin=lambda: Asset.id == foreign(AssetReference.asset_id),
|
||||
foreign_keys=lambda: [AssetReference.asset_id],
|
||||
cascade="all,delete-orphan",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
preview_of: Mapped[list[AssetInfo]] = relationship(
|
||||
"AssetInfo",
|
||||
preview_of: Mapped[list[AssetReference]] = relationship(
|
||||
"AssetReference",
|
||||
back_populates="preview_asset",
|
||||
primaryjoin=lambda: Asset.id == foreign(AssetInfo.preview_id),
|
||||
foreign_keys=lambda: [AssetInfo.preview_id],
|
||||
primaryjoin=lambda: Asset.id == foreign(AssetReference.preview_id),
|
||||
foreign_keys=lambda: [AssetReference.preview_id],
|
||||
viewonly=True,
|
||||
)
|
||||
|
||||
cache_states: Mapped[list[AssetCacheState]] = relationship(
|
||||
back_populates="asset",
|
||||
cascade="all, delete-orphan",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("uq_assets_hash", "hash", unique=True),
|
||||
Index("ix_assets_mime_type", "mime_type"),
|
||||
@@ -73,54 +66,33 @@ class Asset(Base):
|
||||
return f"<Asset id={self.id} hash={(self.hash or '')[:12]}>"
|
||||
|
||||
|
||||
class AssetCacheState(Base):
|
||||
__tablename__ = "asset_cache_state"
|
||||
class AssetReference(Base):
|
||||
"""Unified model combining file cache state and user-facing metadata.
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
Each row represents either:
|
||||
- A filesystem reference (file_path is set) with cache state
|
||||
- An API-created reference (file_path is NULL) without cache state
|
||||
"""
|
||||
|
||||
__tablename__ = "asset_references"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
|
||||
)
|
||||
asset_id: Mapped[str] = mapped_column(
|
||||
String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
file_path: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
|
||||
# Cache state fields (from former AssetCacheState)
|
||||
file_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||
needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
is_missing: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
enrichment_level: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
|
||||
asset: Mapped[Asset] = relationship(back_populates="cache_states")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_asset_cache_state_file_path", "file_path"),
|
||||
Index("ix_asset_cache_state_asset_id", "asset_id"),
|
||||
Index("ix_asset_cache_state_is_missing", "is_missing"),
|
||||
Index("ix_asset_cache_state_enrichment_level", "enrichment_level"),
|
||||
CheckConstraint(
|
||||
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"
|
||||
),
|
||||
CheckConstraint(
|
||||
"enrichment_level >= 0 AND enrichment_level <= 2",
|
||||
name="ck_acs_enrichment_level_range",
|
||||
),
|
||||
UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"),
|
||||
)
|
||||
|
||||
def to_dict(self, include_none: bool = False) -> dict[str, Any]:
|
||||
return to_dict(self, include_none=include_none)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<AssetCacheState id={self.id} asset_id={self.asset_id} path={self.file_path!r}>"
|
||||
|
||||
|
||||
class AssetInfo(Base):
|
||||
__tablename__ = "assets_info"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
|
||||
)
|
||||
# Info fields (from former AssetInfo)
|
||||
owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="")
|
||||
name: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
asset_id: Mapped[str] = mapped_column(
|
||||
String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False
|
||||
)
|
||||
preview_id: Mapped[str | None] = mapped_column(
|
||||
String(36), ForeignKey("assets.id", ondelete="SET NULL")
|
||||
)
|
||||
@@ -139,7 +111,7 @@ class AssetInfo(Base):
|
||||
|
||||
asset: Mapped[Asset] = relationship(
|
||||
"Asset",
|
||||
back_populates="infos",
|
||||
back_populates="references",
|
||||
foreign_keys=[asset_id],
|
||||
lazy="selectin",
|
||||
)
|
||||
@@ -149,37 +121,44 @@ class AssetInfo(Base):
|
||||
foreign_keys=[preview_id],
|
||||
)
|
||||
|
||||
metadata_entries: Mapped[list[AssetInfoMeta]] = relationship(
|
||||
back_populates="asset_info",
|
||||
metadata_entries: Mapped[list[AssetReferenceMeta]] = relationship(
|
||||
back_populates="asset_reference",
|
||||
cascade="all,delete-orphan",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
tag_links: Mapped[list[AssetInfoTag]] = relationship(
|
||||
back_populates="asset_info",
|
||||
tag_links: Mapped[list[AssetReferenceTag]] = relationship(
|
||||
back_populates="asset_reference",
|
||||
cascade="all,delete-orphan",
|
||||
passive_deletes=True,
|
||||
overlaps="tags,asset_infos",
|
||||
overlaps="tags,asset_references",
|
||||
)
|
||||
|
||||
tags: Mapped[list[Tag]] = relationship(
|
||||
secondary="asset_info_tags",
|
||||
back_populates="asset_infos",
|
||||
secondary="asset_reference_tags",
|
||||
back_populates="asset_references",
|
||||
lazy="selectin",
|
||||
viewonly=True,
|
||||
overlaps="tag_links,asset_info_links,asset_infos,tag",
|
||||
overlaps="tag_links,asset_reference_links,asset_references,tag",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"
|
||||
Index("uq_asset_references_file_path", "file_path", unique=True),
|
||||
Index("ix_asset_references_asset_id", "asset_id"),
|
||||
Index("ix_asset_references_owner_id", "owner_id"),
|
||||
Index("ix_asset_references_name", "name"),
|
||||
Index("ix_asset_references_is_missing", "is_missing"),
|
||||
Index("ix_asset_references_enrichment_level", "enrichment_level"),
|
||||
Index("ix_asset_references_created_at", "created_at"),
|
||||
Index("ix_asset_references_last_access_time", "last_access_time"),
|
||||
Index("ix_asset_references_owner_name", "owner_id", "name"),
|
||||
CheckConstraint(
|
||||
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg"
|
||||
),
|
||||
CheckConstraint(
|
||||
"enrichment_level >= 0 AND enrichment_level <= 2",
|
||||
name="ck_ar_enrichment_level_range",
|
||||
),
|
||||
Index("ix_assets_info_owner_name", "owner_id", "name"),
|
||||
Index("ix_assets_info_owner_id", "owner_id"),
|
||||
Index("ix_assets_info_asset_id", "asset_id"),
|
||||
Index("ix_assets_info_name", "name"),
|
||||
Index("ix_assets_info_created_at", "created_at"),
|
||||
Index("ix_assets_info_last_access_time", "last_access_time"),
|
||||
)
|
||||
|
||||
def to_dict(self, include_none: bool = False) -> dict[str, Any]:
|
||||
@@ -188,14 +167,17 @@ class AssetInfo(Base):
|
||||
return data
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<AssetInfo id={self.id} name={self.name!r} asset_id={self.asset_id}>"
|
||||
path_part = f" path={self.file_path!r}" if self.file_path else ""
|
||||
return f"<AssetReference id={self.id} name={self.name!r}{path_part}>"
|
||||
|
||||
|
||||
class AssetInfoMeta(Base):
|
||||
__tablename__ = "asset_info_meta"
|
||||
class AssetReferenceMeta(Base):
|
||||
__tablename__ = "asset_reference_meta"
|
||||
|
||||
asset_info_id: Mapped[str] = mapped_column(
|
||||
String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True
|
||||
asset_reference_id: Mapped[str] = mapped_column(
|
||||
String(36),
|
||||
ForeignKey("asset_references.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
key: Mapped[str] = mapped_column(String(256), primary_key=True)
|
||||
ordinal: Mapped[int] = mapped_column(Integer, primary_key=True, default=0)
|
||||
@@ -205,21 +187,25 @@ class AssetInfoMeta(Base):
|
||||
val_bool: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||
val_json: Mapped[Any | None] = mapped_column(JSON(none_as_null=True), nullable=True)
|
||||
|
||||
asset_info: Mapped[AssetInfo] = relationship(back_populates="metadata_entries")
|
||||
asset_reference: Mapped[AssetReference] = relationship(
|
||||
back_populates="metadata_entries"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_asset_info_meta_key", "key"),
|
||||
Index("ix_asset_info_meta_key_val_str", "key", "val_str"),
|
||||
Index("ix_asset_info_meta_key_val_num", "key", "val_num"),
|
||||
Index("ix_asset_info_meta_key_val_bool", "key", "val_bool"),
|
||||
Index("ix_asset_reference_meta_key", "key"),
|
||||
Index("ix_asset_reference_meta_key_val_str", "key", "val_str"),
|
||||
Index("ix_asset_reference_meta_key_val_num", "key", "val_num"),
|
||||
Index("ix_asset_reference_meta_key_val_bool", "key", "val_bool"),
|
||||
)
|
||||
|
||||
|
||||
class AssetInfoTag(Base):
|
||||
__tablename__ = "asset_info_tags"
|
||||
class AssetReferenceTag(Base):
|
||||
__tablename__ = "asset_reference_tags"
|
||||
|
||||
asset_info_id: Mapped[str] = mapped_column(
|
||||
String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True
|
||||
asset_reference_id: Mapped[str] = mapped_column(
|
||||
String(36),
|
||||
ForeignKey("asset_references.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
tag_name: Mapped[str] = mapped_column(
|
||||
String(512), ForeignKey("tags.name", ondelete="RESTRICT"), primary_key=True
|
||||
@@ -229,12 +215,12 @@ class AssetInfoTag(Base):
|
||||
DateTime(timezone=False), nullable=False, default=get_utc_now
|
||||
)
|
||||
|
||||
asset_info: Mapped[AssetInfo] = relationship(back_populates="tag_links")
|
||||
tag: Mapped[Tag] = relationship(back_populates="asset_info_links")
|
||||
asset_reference: Mapped[AssetReference] = relationship(back_populates="tag_links")
|
||||
tag: Mapped[Tag] = relationship(back_populates="asset_reference_links")
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_asset_info_tags_tag_name", "tag_name"),
|
||||
Index("ix_asset_info_tags_asset_info_id", "asset_info_id"),
|
||||
Index("ix_asset_reference_tags_tag_name", "tag_name"),
|
||||
Index("ix_asset_reference_tags_asset_reference_id", "asset_reference_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -244,15 +230,15 @@ class Tag(Base):
|
||||
name: Mapped[str] = mapped_column(String(512), primary_key=True)
|
||||
tag_type: Mapped[str] = mapped_column(String(32), nullable=False, default="user")
|
||||
|
||||
asset_info_links: Mapped[list[AssetInfoTag]] = relationship(
|
||||
asset_reference_links: Mapped[list[AssetReferenceTag]] = relationship(
|
||||
back_populates="tag",
|
||||
overlaps="asset_infos,tags",
|
||||
overlaps="asset_references,tags",
|
||||
)
|
||||
asset_infos: Mapped[list[AssetInfo]] = relationship(
|
||||
secondary="asset_info_tags",
|
||||
asset_references: Mapped[list[AssetReference]] = relationship(
|
||||
secondary="asset_reference_tags",
|
||||
back_populates="tags",
|
||||
viewonly=True,
|
||||
overlaps="asset_info_links,tag_links,tags,asset_info",
|
||||
overlaps="asset_reference_links,tag_links,tags,asset_reference",
|
||||
)
|
||||
|
||||
__table_args__ = (Index("ix_tags_tag_type", "tag_type"),)
|
||||
|
||||
@@ -3,59 +3,60 @@ from app.assets.database.queries.asset import (
|
||||
bulk_insert_assets,
|
||||
get_asset_by_hash,
|
||||
get_existing_asset_ids,
|
||||
reassign_asset_references,
|
||||
update_asset_hash_and_mime,
|
||||
upsert_asset,
|
||||
)
|
||||
from app.assets.database.queries.asset_info import (
|
||||
asset_info_exists_for_asset_id,
|
||||
bulk_insert_asset_infos_ignore_conflicts,
|
||||
delete_asset_info_by_id,
|
||||
fetch_asset_info_and_asset,
|
||||
fetch_asset_info_asset_and_tags,
|
||||
get_asset_info_by_id,
|
||||
get_asset_info_ids_by_ids,
|
||||
get_or_create_asset_info,
|
||||
insert_asset_info,
|
||||
list_asset_infos_page,
|
||||
set_asset_info_metadata,
|
||||
set_asset_info_preview,
|
||||
update_asset_info_access_time,
|
||||
update_asset_info_name,
|
||||
update_asset_info_timestamps,
|
||||
update_asset_info_updated_at,
|
||||
)
|
||||
from app.assets.database.queries.cache_state import (
|
||||
from app.assets.database.queries.asset_reference import (
|
||||
CacheStateRow,
|
||||
UnenrichedAssetRow,
|
||||
bulk_insert_cache_states_ignore_conflicts,
|
||||
UnenrichedReferenceRow,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
bulk_update_enrichment_level,
|
||||
bulk_update_is_missing,
|
||||
bulk_update_needs_verify,
|
||||
convert_metadata_to_rows,
|
||||
delete_assets_by_ids,
|
||||
delete_cache_states_by_ids,
|
||||
delete_orphaned_seed_asset,
|
||||
get_cache_states_by_paths_and_asset_ids,
|
||||
get_cache_states_for_prefixes,
|
||||
get_unenriched_cache_states,
|
||||
delete_reference_by_id,
|
||||
delete_references_by_ids,
|
||||
fetch_reference_and_asset,
|
||||
fetch_reference_asset_and_tags,
|
||||
get_or_create_reference,
|
||||
get_reference_by_file_path,
|
||||
get_reference_by_id,
|
||||
get_reference_ids_by_ids,
|
||||
get_references_by_paths_and_asset_ids,
|
||||
get_references_for_prefixes,
|
||||
get_unenriched_references,
|
||||
get_unreferenced_unhashed_asset_ids,
|
||||
list_cache_states_by_asset_id,
|
||||
mark_cache_states_missing_outside_prefixes,
|
||||
restore_cache_states_by_paths,
|
||||
insert_reference,
|
||||
list_references_by_asset_id,
|
||||
list_references_page,
|
||||
mark_references_missing_outside_prefixes,
|
||||
reference_exists_for_asset_id,
|
||||
restore_references_by_paths,
|
||||
set_reference_metadata,
|
||||
set_reference_preview,
|
||||
update_enrichment_level,
|
||||
upsert_cache_state,
|
||||
update_reference_access_time,
|
||||
update_reference_name,
|
||||
update_reference_timestamps,
|
||||
update_reference_updated_at,
|
||||
upsert_reference,
|
||||
)
|
||||
from app.assets.database.queries.tags import (
|
||||
AddTagsDict,
|
||||
RemoveTagsDict,
|
||||
SetTagsDict,
|
||||
add_missing_tag_for_asset_id,
|
||||
add_tags_to_asset_info,
|
||||
add_tags_to_reference,
|
||||
bulk_insert_tags_and_meta,
|
||||
ensure_tags_exist,
|
||||
get_asset_tags,
|
||||
get_reference_tags,
|
||||
list_tags_with_usage,
|
||||
remove_missing_tag_for_asset_id,
|
||||
remove_tags_from_asset_info,
|
||||
set_asset_info_tags,
|
||||
remove_tags_from_reference,
|
||||
set_reference_tags,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -63,51 +64,54 @@ __all__ = [
|
||||
"CacheStateRow",
|
||||
"RemoveTagsDict",
|
||||
"SetTagsDict",
|
||||
"UnenrichedAssetRow",
|
||||
"UnenrichedReferenceRow",
|
||||
"add_missing_tag_for_asset_id",
|
||||
"add_tags_to_asset_info",
|
||||
"add_tags_to_reference",
|
||||
"asset_exists_by_hash",
|
||||
"asset_info_exists_for_asset_id",
|
||||
"bulk_insert_asset_infos_ignore_conflicts",
|
||||
"bulk_insert_assets",
|
||||
"bulk_insert_cache_states_ignore_conflicts",
|
||||
"bulk_insert_references_ignore_conflicts",
|
||||
"bulk_insert_tags_and_meta",
|
||||
"bulk_update_enrichment_level",
|
||||
"bulk_update_is_missing",
|
||||
"bulk_update_needs_verify",
|
||||
"delete_asset_info_by_id",
|
||||
"convert_metadata_to_rows",
|
||||
"delete_assets_by_ids",
|
||||
"delete_cache_states_by_ids",
|
||||
"delete_orphaned_seed_asset",
|
||||
"delete_reference_by_id",
|
||||
"delete_references_by_ids",
|
||||
"ensure_tags_exist",
|
||||
"fetch_asset_info_and_asset",
|
||||
"fetch_asset_info_asset_and_tags",
|
||||
"fetch_reference_and_asset",
|
||||
"fetch_reference_asset_and_tags",
|
||||
"get_asset_by_hash",
|
||||
"get_existing_asset_ids",
|
||||
"get_asset_info_by_id",
|
||||
"get_asset_info_ids_by_ids",
|
||||
"get_asset_tags",
|
||||
"get_cache_states_by_paths_and_asset_ids",
|
||||
"get_cache_states_for_prefixes",
|
||||
"get_or_create_asset_info",
|
||||
"get_unenriched_cache_states",
|
||||
"get_or_create_reference",
|
||||
"get_reference_by_file_path",
|
||||
"get_reference_by_id",
|
||||
"get_reference_ids_by_ids",
|
||||
"get_reference_tags",
|
||||
"get_references_by_paths_and_asset_ids",
|
||||
"get_references_for_prefixes",
|
||||
"get_unenriched_references",
|
||||
"get_unreferenced_unhashed_asset_ids",
|
||||
"insert_asset_info",
|
||||
"list_asset_infos_page",
|
||||
"list_cache_states_by_asset_id",
|
||||
"insert_reference",
|
||||
"list_references_by_asset_id",
|
||||
"list_references_page",
|
||||
"list_tags_with_usage",
|
||||
"mark_cache_states_missing_outside_prefixes",
|
||||
"mark_references_missing_outside_prefixes",
|
||||
"reassign_asset_references",
|
||||
"reference_exists_for_asset_id",
|
||||
"remove_missing_tag_for_asset_id",
|
||||
"remove_tags_from_asset_info",
|
||||
"restore_cache_states_by_paths",
|
||||
"set_asset_info_metadata",
|
||||
"set_asset_info_preview",
|
||||
"set_asset_info_tags",
|
||||
"update_asset_info_access_time",
|
||||
"update_asset_info_name",
|
||||
"update_asset_info_timestamps",
|
||||
"update_asset_info_updated_at",
|
||||
"remove_tags_from_reference",
|
||||
"restore_references_by_paths",
|
||||
"set_reference_metadata",
|
||||
"set_reference_preview",
|
||||
"set_reference_tags",
|
||||
"update_asset_hash_and_mime",
|
||||
"update_enrichment_level",
|
||||
"update_reference_access_time",
|
||||
"update_reference_name",
|
||||
"update_reference_timestamps",
|
||||
"update_reference_updated_at",
|
||||
"upsert_asset",
|
||||
"upsert_cache_state",
|
||||
"upsert_reference",
|
||||
]
|
||||
|
||||
@@ -82,7 +82,7 @@ def bulk_insert_assets(
|
||||
session: Session,
|
||||
rows: list[dict],
|
||||
) -> None:
|
||||
"""Bulk insert Asset rows. Each dict should have: id, hash, size_bytes, mime_type, created_at."""
|
||||
"""Bulk insert Asset rows with ON CONFLICT DO NOTHING on hash."""
|
||||
if not rows:
|
||||
return
|
||||
ins = sqlite.insert(Asset).on_conflict_do_nothing(index_elements=[Asset.hash])
|
||||
@@ -101,3 +101,39 @@ def get_existing_asset_ids(
|
||||
select(Asset.id).where(Asset.id.in_(asset_ids))
|
||||
).fetchall()
|
||||
return {row[0] for row in rows}
|
||||
|
||||
|
||||
def update_asset_hash_and_mime(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
asset_hash: str | None = None,
|
||||
mime_type: str | None = None,
|
||||
) -> bool:
|
||||
"""Update asset hash and/or mime_type. Returns True if asset was found."""
|
||||
asset = session.get(Asset, asset_id)
|
||||
if not asset:
|
||||
return False
|
||||
if asset_hash is not None:
|
||||
asset.hash = asset_hash
|
||||
if mime_type is not None:
|
||||
asset.mime_type = mime_type
|
||||
return True
|
||||
|
||||
|
||||
def reassign_asset_references(
|
||||
session: Session,
|
||||
from_asset_id: str,
|
||||
to_asset_id: str,
|
||||
reference_id: str,
|
||||
) -> None:
|
||||
"""Reassign a reference from one asset to another.
|
||||
|
||||
Used when merging a stub asset into an existing asset with the same hash.
|
||||
"""
|
||||
from app.assets.database.models import AssetReference
|
||||
|
||||
ref = session.get(AssetReference, reference_id)
|
||||
if ref:
|
||||
ref.asset_id = to_asset_id
|
||||
|
||||
session.flush()
|
||||
|
||||
@@ -1,527 +0,0 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import delete, exists, select
|
||||
from sqlalchemy.dialects import sqlite
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session, contains_eager, noload
|
||||
|
||||
from app.assets.database.models import (
|
||||
Asset,
|
||||
AssetInfo,
|
||||
AssetInfoMeta,
|
||||
AssetInfoTag,
|
||||
Tag,
|
||||
)
|
||||
from app.assets.database.queries.common import (
|
||||
MAX_BIND_PARAMS,
|
||||
build_visible_owner_clause,
|
||||
calculate_rows_per_statement,
|
||||
iter_chunks,
|
||||
)
|
||||
from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
|
||||
|
||||
|
||||
def _check_is_scalar(v):
|
||||
if v is None:
|
||||
return True
|
||||
if isinstance(v, bool):
|
||||
return True
|
||||
if isinstance(v, (int, float, Decimal, str)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _scalar_to_row(key: str, ordinal: int, value) -> dict:
|
||||
"""Convert a scalar value to a typed projection row."""
|
||||
if value is None:
|
||||
return {
|
||||
"key": key,
|
||||
"ordinal": ordinal,
|
||||
"val_str": None,
|
||||
"val_num": None,
|
||||
"val_bool": None,
|
||||
"val_json": None,
|
||||
}
|
||||
if isinstance(value, bool):
|
||||
return {"key": key, "ordinal": ordinal, "val_bool": bool(value)}
|
||||
if isinstance(value, (int, float, Decimal)):
|
||||
num = value if isinstance(value, Decimal) else Decimal(str(value))
|
||||
return {"key": key, "ordinal": ordinal, "val_num": num}
|
||||
if isinstance(value, str):
|
||||
return {"key": key, "ordinal": ordinal, "val_str": value}
|
||||
return {"key": key, "ordinal": ordinal, "val_json": value}
|
||||
|
||||
|
||||
def convert_metadata_to_rows(key: str, value) -> list[dict]:
|
||||
"""
|
||||
Turn a metadata key/value into typed projection rows.
|
||||
Returns list[dict] with keys:
|
||||
key, ordinal, and one of val_str / val_num / val_bool / val_json (others None)
|
||||
"""
|
||||
if value is None:
|
||||
return [_scalar_to_row(key, 0, None)]
|
||||
|
||||
if _check_is_scalar(value):
|
||||
return [_scalar_to_row(key, 0, value)]
|
||||
|
||||
if isinstance(value, list):
|
||||
if all(_check_is_scalar(x) for x in value):
|
||||
return [_scalar_to_row(key, i, x) for i, x in enumerate(value)]
|
||||
return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)]
|
||||
|
||||
return [{"key": key, "ordinal": 0, "val_json": value}]
|
||||
|
||||
|
||||
def _apply_tag_filters(
|
||||
stmt: sa.sql.Select,
|
||||
include_tags: Sequence[str] | None = None,
|
||||
exclude_tags: Sequence[str] | None = None,
|
||||
) -> sa.sql.Select:
|
||||
"""include_tags: every tag must be present; exclude_tags: none may be present."""
|
||||
include_tags = normalize_tags(include_tags)
|
||||
exclude_tags = normalize_tags(exclude_tags)
|
||||
|
||||
if include_tags:
|
||||
for tag_name in include_tags:
|
||||
stmt = stmt.where(
|
||||
exists().where(
|
||||
(AssetInfoTag.asset_info_id == AssetInfo.id)
|
||||
& (AssetInfoTag.tag_name == tag_name)
|
||||
)
|
||||
)
|
||||
|
||||
if exclude_tags:
|
||||
stmt = stmt.where(
|
||||
~exists().where(
|
||||
(AssetInfoTag.asset_info_id == AssetInfo.id)
|
||||
& (AssetInfoTag.tag_name.in_(exclude_tags))
|
||||
)
|
||||
)
|
||||
return stmt
|
||||
|
||||
|
||||
def _apply_metadata_filter(
|
||||
stmt: sa.sql.Select,
|
||||
metadata_filter: dict | None = None,
|
||||
) -> sa.sql.Select:
|
||||
"""Apply filters using asset_info_meta projection table."""
|
||||
if not metadata_filter:
|
||||
return stmt
|
||||
|
||||
def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement:
|
||||
return sa.exists().where(
|
||||
AssetInfoMeta.asset_info_id == AssetInfo.id,
|
||||
AssetInfoMeta.key == key,
|
||||
*preds,
|
||||
)
|
||||
|
||||
def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement:
|
||||
if value is None:
|
||||
no_row_for_key = sa.not_(
|
||||
sa.exists().where(
|
||||
AssetInfoMeta.asset_info_id == AssetInfo.id,
|
||||
AssetInfoMeta.key == key,
|
||||
)
|
||||
)
|
||||
null_row = _exists_for_pred(
|
||||
key,
|
||||
AssetInfoMeta.val_json.is_(None),
|
||||
AssetInfoMeta.val_str.is_(None),
|
||||
AssetInfoMeta.val_num.is_(None),
|
||||
AssetInfoMeta.val_bool.is_(None),
|
||||
)
|
||||
return sa.or_(no_row_for_key, null_row)
|
||||
|
||||
if isinstance(value, bool):
|
||||
return _exists_for_pred(key, AssetInfoMeta.val_bool == bool(value))
|
||||
if isinstance(value, (int, float)):
|
||||
num = value if isinstance(value, Decimal) else Decimal(str(value))
|
||||
return _exists_for_pred(key, AssetInfoMeta.val_num == num)
|
||||
if isinstance(value, str):
|
||||
return _exists_for_pred(key, AssetInfoMeta.val_str == value)
|
||||
return _exists_for_pred(key, AssetInfoMeta.val_json == value)
|
||||
|
||||
for k, v in metadata_filter.items():
|
||||
if isinstance(v, list):
|
||||
ors = [_exists_clause_for_value(k, elem) for elem in v]
|
||||
if ors:
|
||||
stmt = stmt.where(sa.or_(*ors))
|
||||
else:
|
||||
stmt = stmt.where(_exists_clause_for_value(k, v))
|
||||
return stmt
|
||||
|
||||
|
||||
def asset_info_exists_for_asset_id(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
) -> bool:
|
||||
q = (
|
||||
select(sa.literal(True))
|
||||
.select_from(AssetInfo)
|
||||
.where(AssetInfo.asset_id == asset_id)
|
||||
.limit(1)
|
||||
)
|
||||
return (session.execute(q)).first() is not None
|
||||
|
||||
|
||||
def get_asset_info_by_id(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
) -> AssetInfo | None:
|
||||
return session.get(AssetInfo, asset_info_id)
|
||||
|
||||
|
||||
def insert_asset_info(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
owner_id: str,
|
||||
name: str,
|
||||
preview_id: str | None = None,
|
||||
) -> AssetInfo | None:
|
||||
"""Insert a new AssetInfo. Returns None if unique constraint violated."""
|
||||
now = get_utc_now()
|
||||
try:
|
||||
with session.begin_nested():
|
||||
info = AssetInfo(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset_id,
|
||||
preview_id=preview_id,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.flush()
|
||||
return info
|
||||
except IntegrityError:
|
||||
return None
|
||||
|
||||
|
||||
def get_or_create_asset_info(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
owner_id: str,
|
||||
name: str,
|
||||
preview_id: str | None = None,
|
||||
) -> tuple[AssetInfo, bool]:
|
||||
"""Get existing or create new AssetInfo. Returns (info, created)."""
|
||||
info = insert_asset_info(
|
||||
session,
|
||||
asset_id=asset_id,
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
preview_id=preview_id,
|
||||
)
|
||||
if info:
|
||||
return info, True
|
||||
|
||||
existing = (
|
||||
session.execute(
|
||||
select(AssetInfo)
|
||||
.where(
|
||||
AssetInfo.asset_id == asset_id,
|
||||
AssetInfo.name == name,
|
||||
AssetInfo.owner_id == owner_id,
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
.unique()
|
||||
.scalar_one_or_none()
|
||||
)
|
||||
if not existing:
|
||||
raise RuntimeError("Failed to find AssetInfo after insert conflict.")
|
||||
return existing, False
|
||||
|
||||
|
||||
def update_asset_info_timestamps(
|
||||
session: Session,
|
||||
asset_info: AssetInfo,
|
||||
preview_id: str | None = None,
|
||||
) -> None:
|
||||
"""Update timestamps and optionally preview_id on existing AssetInfo."""
|
||||
now = get_utc_now()
|
||||
if preview_id and asset_info.preview_id != preview_id:
|
||||
asset_info.preview_id = preview_id
|
||||
asset_info.updated_at = now
|
||||
if asset_info.last_access_time < now:
|
||||
asset_info.last_access_time = now
|
||||
session.flush()
|
||||
|
||||
|
||||
def list_asset_infos_page(
|
||||
session: Session,
|
||||
owner_id: str = "",
|
||||
include_tags: Sequence[str] | None = None,
|
||||
exclude_tags: Sequence[str] | None = None,
|
||||
name_contains: str | None = None,
|
||||
metadata_filter: dict | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
sort: str = "created_at",
|
||||
order: str = "desc",
|
||||
) -> tuple[list[AssetInfo], dict[str, list[str]], int]:
|
||||
base = (
|
||||
select(AssetInfo)
|
||||
.join(Asset, Asset.id == AssetInfo.asset_id)
|
||||
.options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags))
|
||||
.where(build_visible_owner_clause(owner_id))
|
||||
)
|
||||
|
||||
if name_contains:
|
||||
escaped, esc = escape_sql_like_string(name_contains)
|
||||
base = base.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
|
||||
|
||||
base = _apply_tag_filters(base, include_tags, exclude_tags)
|
||||
base = _apply_metadata_filter(base, metadata_filter)
|
||||
|
||||
sort = (sort or "created_at").lower()
|
||||
order = (order or "desc").lower()
|
||||
sort_map = {
|
||||
"name": AssetInfo.name,
|
||||
"created_at": AssetInfo.created_at,
|
||||
"updated_at": AssetInfo.updated_at,
|
||||
"last_access_time": AssetInfo.last_access_time,
|
||||
"size": Asset.size_bytes,
|
||||
}
|
||||
sort_col = sort_map.get(sort, AssetInfo.created_at)
|
||||
sort_exp = sort_col.desc() if order == "desc" else sort_col.asc()
|
||||
|
||||
base = base.order_by(sort_exp).limit(limit).offset(offset)
|
||||
|
||||
count_stmt = (
|
||||
select(sa.func.count())
|
||||
.select_from(AssetInfo)
|
||||
.join(Asset, Asset.id == AssetInfo.asset_id)
|
||||
.where(build_visible_owner_clause(owner_id))
|
||||
)
|
||||
if name_contains:
|
||||
escaped, esc = escape_sql_like_string(name_contains)
|
||||
count_stmt = count_stmt.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
|
||||
count_stmt = _apply_tag_filters(count_stmt, include_tags, exclude_tags)
|
||||
count_stmt = _apply_metadata_filter(count_stmt, metadata_filter)
|
||||
|
||||
total = int((session.execute(count_stmt)).scalar_one() or 0)
|
||||
|
||||
infos = (session.execute(base)).unique().scalars().all()
|
||||
|
||||
id_list: list[str] = [i.id for i in infos]
|
||||
tag_map: dict[str, list[str]] = defaultdict(list)
|
||||
if id_list:
|
||||
rows = session.execute(
|
||||
select(AssetInfoTag.asset_info_id, Tag.name)
|
||||
.join(Tag, Tag.name == AssetInfoTag.tag_name)
|
||||
.where(AssetInfoTag.asset_info_id.in_(id_list))
|
||||
.order_by(AssetInfoTag.added_at)
|
||||
)
|
||||
for aid, tag_name in rows.all():
|
||||
tag_map[aid].append(tag_name)
|
||||
|
||||
return infos, tag_map, total
|
||||
|
||||
|
||||
def fetch_asset_info_asset_and_tags(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
owner_id: str = "",
|
||||
) -> tuple[AssetInfo, Asset, list[str]] | None:
|
||||
stmt = (
|
||||
select(AssetInfo, Asset, Tag.name)
|
||||
.join(Asset, Asset.id == AssetInfo.asset_id)
|
||||
.join(AssetInfoTag, AssetInfoTag.asset_info_id == AssetInfo.id, isouter=True)
|
||||
.join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True)
|
||||
.where(
|
||||
AssetInfo.id == asset_info_id,
|
||||
build_visible_owner_clause(owner_id),
|
||||
)
|
||||
.options(noload(AssetInfo.tags))
|
||||
.order_by(Tag.name.asc())
|
||||
)
|
||||
|
||||
rows = (session.execute(stmt)).all()
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
first_info, first_asset, _ = rows[0]
|
||||
tags: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for _info, _asset, tag_name in rows:
|
||||
if tag_name and tag_name not in seen:
|
||||
seen.add(tag_name)
|
||||
tags.append(tag_name)
|
||||
return first_info, first_asset, tags
|
||||
|
||||
|
||||
def fetch_asset_info_and_asset(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
owner_id: str = "",
|
||||
) -> tuple[AssetInfo, Asset] | None:
|
||||
stmt = (
|
||||
select(AssetInfo, Asset)
|
||||
.join(Asset, Asset.id == AssetInfo.asset_id)
|
||||
.where(
|
||||
AssetInfo.id == asset_info_id,
|
||||
build_visible_owner_clause(owner_id),
|
||||
)
|
||||
.limit(1)
|
||||
.options(noload(AssetInfo.tags))
|
||||
)
|
||||
row = session.execute(stmt)
|
||||
pair = row.first()
|
||||
if not pair:
|
||||
return None
|
||||
return pair[0], pair[1]
|
||||
|
||||
|
||||
def update_asset_info_access_time(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
ts: datetime | None = None,
|
||||
only_if_newer: bool = True,
|
||||
) -> None:
|
||||
ts = ts or get_utc_now()
|
||||
stmt = sa.update(AssetInfo).where(AssetInfo.id == asset_info_id)
|
||||
if only_if_newer:
|
||||
stmt = stmt.where(
|
||||
sa.or_(
|
||||
AssetInfo.last_access_time.is_(None), AssetInfo.last_access_time < ts
|
||||
)
|
||||
)
|
||||
session.execute(stmt.values(last_access_time=ts))
|
||||
|
||||
|
||||
def update_asset_info_name(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
name: str,
|
||||
) -> None:
|
||||
"""Update the name of an AssetInfo."""
|
||||
now = get_utc_now()
|
||||
session.execute(
|
||||
sa.update(AssetInfo)
|
||||
.where(AssetInfo.id == asset_info_id)
|
||||
.values(name=name, updated_at=now)
|
||||
)
|
||||
|
||||
|
||||
def update_asset_info_updated_at(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
ts: datetime | None = None,
|
||||
) -> None:
|
||||
"""Update the updated_at timestamp of an AssetInfo."""
|
||||
ts = ts or get_utc_now()
|
||||
session.execute(
|
||||
sa.update(AssetInfo).where(AssetInfo.id == asset_info_id).values(updated_at=ts)
|
||||
)
|
||||
|
||||
|
||||
def set_asset_info_metadata(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
user_metadata: dict | None = None,
|
||||
) -> None:
|
||||
info = session.get(AssetInfo, asset_info_id)
|
||||
if not info:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
|
||||
info.user_metadata = user_metadata or {}
|
||||
info.updated_at = get_utc_now()
|
||||
session.flush()
|
||||
|
||||
session.execute(
|
||||
delete(AssetInfoMeta).where(AssetInfoMeta.asset_info_id == asset_info_id)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
if not user_metadata:
|
||||
return
|
||||
|
||||
rows: list[AssetInfoMeta] = []
|
||||
for k, v in user_metadata.items():
|
||||
for r in convert_metadata_to_rows(k, v):
|
||||
rows.append(
|
||||
AssetInfoMeta(
|
||||
asset_info_id=asset_info_id,
|
||||
key=r["key"],
|
||||
ordinal=int(r["ordinal"]),
|
||||
val_str=r.get("val_str"),
|
||||
val_num=r.get("val_num"),
|
||||
val_bool=r.get("val_bool"),
|
||||
val_json=r.get("val_json"),
|
||||
)
|
||||
)
|
||||
if rows:
|
||||
session.add_all(rows)
|
||||
session.flush()
|
||||
|
||||
|
||||
def delete_asset_info_by_id(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
owner_id: str,
|
||||
) -> bool:
|
||||
stmt = sa.delete(AssetInfo).where(
|
||||
AssetInfo.id == asset_info_id,
|
||||
build_visible_owner_clause(owner_id),
|
||||
)
|
||||
return int((session.execute(stmt)).rowcount or 0) > 0
|
||||
|
||||
|
||||
def set_asset_info_preview(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
preview_asset_id: str | None = None,
|
||||
) -> None:
|
||||
"""Set or clear preview_id and bump updated_at. Raises on unknown IDs."""
|
||||
info = session.get(AssetInfo, asset_info_id)
|
||||
if not info:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
|
||||
if preview_asset_id is None:
|
||||
info.preview_id = None
|
||||
else:
|
||||
if not session.get(Asset, preview_asset_id):
|
||||
raise ValueError(f"Preview Asset {preview_asset_id} not found")
|
||||
info.preview_id = preview_asset_id
|
||||
|
||||
info.updated_at = get_utc_now()
|
||||
session.flush()
|
||||
|
||||
|
||||
def bulk_insert_asset_infos_ignore_conflicts(
|
||||
session: Session,
|
||||
rows: list[dict],
|
||||
) -> None:
|
||||
"""Bulk insert AssetInfo rows with ON CONFLICT DO NOTHING.
|
||||
|
||||
Each dict should have: id, owner_id, name, asset_id, preview_id,
|
||||
user_metadata, created_at, updated_at, last_access_time
|
||||
"""
|
||||
if not rows:
|
||||
return
|
||||
ins = sqlite.insert(AssetInfo).on_conflict_do_nothing(
|
||||
index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name]
|
||||
)
|
||||
for chunk in iter_chunks(rows, calculate_rows_per_statement(9)):
|
||||
session.execute(ins, chunk)
|
||||
|
||||
|
||||
def get_asset_info_ids_by_ids(
|
||||
session: Session,
|
||||
info_ids: list[str],
|
||||
) -> set[str]:
|
||||
"""Query to find which AssetInfo IDs exist in the database."""
|
||||
if not info_ids:
|
||||
return set()
|
||||
|
||||
found: set[str] = set()
|
||||
for chunk in iter_chunks(info_ids, MAX_BIND_PARAMS):
|
||||
result = session.execute(select(AssetInfo.id).where(AssetInfo.id.in_(chunk)))
|
||||
found.update(result.scalars().all())
|
||||
return found
|
||||
1039
app/assets/database/queries/asset_reference.py
Normal file
1039
app/assets/database/queries/asset_reference.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,451 +0,0 @@
|
||||
import os
|
||||
from typing import NamedTuple, Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects import sqlite
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetCacheState, AssetInfo
|
||||
from app.assets.database.queries.common import (
|
||||
MAX_BIND_PARAMS,
|
||||
calculate_rows_per_statement,
|
||||
iter_chunks,
|
||||
)
|
||||
from app.assets.helpers import escape_sql_like_string
|
||||
|
||||
|
||||
class CacheStateRow(NamedTuple):
|
||||
"""Row from cache state query with joined asset data."""
|
||||
|
||||
state_id: int
|
||||
file_path: str
|
||||
mtime_ns: int | None
|
||||
needs_verify: bool
|
||||
asset_id: str
|
||||
asset_hash: str | None
|
||||
size_bytes: int
|
||||
|
||||
|
||||
def list_cache_states_by_asset_id(
|
||||
session: Session, *, asset_id: str
|
||||
) -> Sequence[AssetCacheState]:
|
||||
return (
|
||||
(
|
||||
session.execute(
|
||||
select(AssetCacheState)
|
||||
.where(AssetCacheState.asset_id == asset_id)
|
||||
.order_by(AssetCacheState.id.asc())
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def upsert_cache_state(
|
||||
session: Session,
|
||||
asset_id: str,
|
||||
file_path: str,
|
||||
mtime_ns: int,
|
||||
) -> tuple[bool, bool]:
|
||||
"""Upsert a cache state by file_path. Returns (created, updated).
|
||||
|
||||
Also restores cache states that were previously marked as missing.
|
||||
"""
|
||||
vals = {
|
||||
"asset_id": asset_id,
|
||||
"file_path": file_path,
|
||||
"mtime_ns": int(mtime_ns),
|
||||
"is_missing": False,
|
||||
}
|
||||
ins = (
|
||||
sqlite.insert(AssetCacheState)
|
||||
.values(**vals)
|
||||
.on_conflict_do_nothing(index_elements=[AssetCacheState.file_path])
|
||||
)
|
||||
res = session.execute(ins)
|
||||
created = int(res.rowcount or 0) > 0
|
||||
|
||||
if created:
|
||||
return True, False
|
||||
|
||||
upd = (
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.file_path == file_path)
|
||||
.where(
|
||||
sa.or_(
|
||||
AssetCacheState.asset_id != asset_id,
|
||||
AssetCacheState.mtime_ns.is_(None),
|
||||
AssetCacheState.mtime_ns != int(mtime_ns),
|
||||
AssetCacheState.is_missing == True, # noqa: E712
|
||||
)
|
||||
)
|
||||
.values(asset_id=asset_id, mtime_ns=int(mtime_ns), is_missing=False)
|
||||
)
|
||||
res2 = session.execute(upd)
|
||||
updated = int(res2.rowcount or 0) > 0
|
||||
return False, updated
|
||||
|
||||
|
||||
def mark_cache_states_missing_outside_prefixes(
|
||||
session: Session, valid_prefixes: list[str]
|
||||
) -> int:
|
||||
"""Mark cache states as missing when file_path doesn't match any valid prefix.
|
||||
|
||||
This is a non-destructive soft-delete that preserves user metadata.
|
||||
Cache states can be restored if the file reappears in a future scan.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
valid_prefixes: List of absolute directory prefixes that are valid
|
||||
|
||||
Returns:
|
||||
Number of cache states marked as missing
|
||||
"""
|
||||
if not valid_prefixes:
|
||||
return 0
|
||||
|
||||
def make_prefix_condition(prefix: str):
|
||||
base = prefix if prefix.endswith(os.sep) else prefix + os.sep
|
||||
escaped, esc = escape_sql_like_string(base)
|
||||
return AssetCacheState.file_path.like(escaped + "%", escape=esc)
|
||||
|
||||
matches_valid_prefix = sa.or_(*[make_prefix_condition(p) for p in valid_prefixes])
|
||||
result = session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(~matches_valid_prefix)
|
||||
.where(AssetCacheState.is_missing == False) # noqa: E712
|
||||
.values(is_missing=True)
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def restore_cache_states_by_paths(session: Session, file_paths: list[str]) -> int:
|
||||
"""Restore cache states that were previously marked as missing.
|
||||
|
||||
Called when a file path is re-scanned and found to exist.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
file_paths: List of file paths that exist and should be restored
|
||||
|
||||
Returns:
|
||||
Number of cache states restored
|
||||
"""
|
||||
if not file_paths:
|
||||
return 0
|
||||
|
||||
result = session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.file_path.in_(file_paths))
|
||||
.where(AssetCacheState.is_missing == True) # noqa: E712
|
||||
.values(is_missing=False)
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def get_unreferenced_unhashed_asset_ids(session: Session) -> list[str]:
|
||||
"""Get IDs of unhashed assets (hash=None) with no active cache states.
|
||||
|
||||
An asset is considered unreferenced if it has no cache states,
|
||||
or all its cache states are marked as missing.
|
||||
|
||||
Returns:
|
||||
List of asset IDs that are unreferenced
|
||||
"""
|
||||
active_cache_state_exists = (
|
||||
sa.select(sa.literal(1))
|
||||
.where(AssetCacheState.asset_id == Asset.id)
|
||||
.where(AssetCacheState.is_missing == False) # noqa: E712
|
||||
.correlate(Asset)
|
||||
.exists()
|
||||
)
|
||||
unreferenced_subq = sa.select(Asset.id).where(
|
||||
Asset.hash.is_(None), ~active_cache_state_exists
|
||||
)
|
||||
return [row[0] for row in session.execute(unreferenced_subq).all()]
|
||||
|
||||
|
||||
def delete_assets_by_ids(session: Session, asset_ids: list[str]) -> int:
|
||||
"""Delete assets and their AssetInfos by ID.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
asset_ids: List of asset IDs to delete
|
||||
|
||||
Returns:
|
||||
Number of assets deleted
|
||||
"""
|
||||
if not asset_ids:
|
||||
return 0
|
||||
session.execute(sa.delete(AssetInfo).where(AssetInfo.asset_id.in_(asset_ids)))
|
||||
result = session.execute(sa.delete(Asset).where(Asset.id.in_(asset_ids)))
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def get_cache_states_for_prefixes(
|
||||
session: Session,
|
||||
prefixes: list[str],
|
||||
*,
|
||||
include_missing: bool = False,
|
||||
) -> list[CacheStateRow]:
|
||||
"""Get all cache states with paths matching any of the given prefixes.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
prefixes: List of absolute directory prefixes to match
|
||||
include_missing: If False (default), exclude cache states marked as missing
|
||||
|
||||
Returns:
|
||||
List of cache state rows with joined asset data, ordered by asset_id, state_id
|
||||
"""
|
||||
if not prefixes:
|
||||
return []
|
||||
|
||||
conds = []
|
||||
for p in prefixes:
|
||||
base = os.path.abspath(p)
|
||||
if not base.endswith(os.sep):
|
||||
base += os.sep
|
||||
escaped, esc = escape_sql_like_string(base)
|
||||
conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc))
|
||||
|
||||
query = (
|
||||
sa.select(
|
||||
AssetCacheState.id,
|
||||
AssetCacheState.file_path,
|
||||
AssetCacheState.mtime_ns,
|
||||
AssetCacheState.needs_verify,
|
||||
AssetCacheState.asset_id,
|
||||
Asset.hash,
|
||||
Asset.size_bytes,
|
||||
)
|
||||
.join(Asset, Asset.id == AssetCacheState.asset_id)
|
||||
.where(sa.or_(*conds))
|
||||
)
|
||||
|
||||
if not include_missing:
|
||||
query = query.where(AssetCacheState.is_missing == False) # noqa: E712
|
||||
|
||||
rows = session.execute(
|
||||
query.order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc())
|
||||
).all()
|
||||
|
||||
return [
|
||||
CacheStateRow(
|
||||
state_id=row[0],
|
||||
file_path=row[1],
|
||||
mtime_ns=row[2],
|
||||
needs_verify=row[3],
|
||||
asset_id=row[4],
|
||||
asset_hash=row[5],
|
||||
size_bytes=int(row[6] or 0),
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def bulk_update_needs_verify(session: Session, state_ids: list[int], value: bool) -> int:
|
||||
"""Set needs_verify flag for multiple cache states.
|
||||
|
||||
Returns: Number of rows updated
|
||||
"""
|
||||
if not state_ids:
|
||||
return 0
|
||||
result = session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.id.in_(state_ids))
|
||||
.values(needs_verify=value)
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def bulk_update_is_missing(session: Session, state_ids: list[int], value: bool) -> int:
|
||||
"""Set is_missing flag for multiple cache states.
|
||||
|
||||
Returns: Number of rows updated
|
||||
"""
|
||||
if not state_ids:
|
||||
return 0
|
||||
result = session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.id.in_(state_ids))
|
||||
.values(is_missing=value)
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def delete_cache_states_by_ids(session: Session, state_ids: list[int]) -> int:
|
||||
"""Delete cache states by their IDs.
|
||||
|
||||
Returns: Number of rows deleted
|
||||
"""
|
||||
if not state_ids:
|
||||
return 0
|
||||
result = session.execute(
|
||||
sa.delete(AssetCacheState).where(AssetCacheState.id.in_(state_ids))
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def delete_orphaned_seed_asset(session: Session, asset_id: str) -> bool:
|
||||
"""Delete a seed asset (hash is None) and its AssetInfos.
|
||||
|
||||
Returns: True if asset was deleted, False if not found
|
||||
"""
|
||||
session.execute(sa.delete(AssetInfo).where(AssetInfo.asset_id == asset_id))
|
||||
asset = session.get(Asset, asset_id)
|
||||
if asset:
|
||||
session.delete(asset)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class UnenrichedAssetRow(NamedTuple):
|
||||
"""Row for assets needing enrichment."""
|
||||
|
||||
cache_state_id: int
|
||||
asset_id: str
|
||||
asset_info_id: str
|
||||
file_path: str
|
||||
enrichment_level: int
|
||||
|
||||
|
||||
def get_unenriched_cache_states(
|
||||
session: Session,
|
||||
prefixes: list[str],
|
||||
max_level: int = 0,
|
||||
limit: int = 1000,
|
||||
) -> list[UnenrichedAssetRow]:
|
||||
"""Get cache states that need enrichment (enrichment_level <= max_level).
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
prefixes: List of absolute directory prefixes to scan
|
||||
max_level: Maximum enrichment level to include (0=stubs, 1=metadata done)
|
||||
limit: Maximum number of rows to return
|
||||
|
||||
Returns:
|
||||
List of unenriched asset rows with file paths
|
||||
"""
|
||||
if not prefixes:
|
||||
return []
|
||||
|
||||
conds = []
|
||||
for p in prefixes:
|
||||
base = os.path.abspath(p)
|
||||
if not base.endswith(os.sep):
|
||||
base += os.sep
|
||||
escaped, esc = escape_sql_like_string(base)
|
||||
conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc))
|
||||
|
||||
query = (
|
||||
sa.select(
|
||||
AssetCacheState.id,
|
||||
AssetCacheState.asset_id,
|
||||
AssetInfo.id,
|
||||
AssetCacheState.file_path,
|
||||
AssetCacheState.enrichment_level,
|
||||
)
|
||||
.join(Asset, Asset.id == AssetCacheState.asset_id)
|
||||
.join(AssetInfo, AssetInfo.asset_id == Asset.id)
|
||||
.where(sa.or_(*conds))
|
||||
.where(AssetCacheState.is_missing == False) # noqa: E712
|
||||
.where(AssetCacheState.enrichment_level <= max_level)
|
||||
.order_by(AssetCacheState.id.asc())
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
rows = session.execute(query).all()
|
||||
return [
|
||||
UnenrichedAssetRow(
|
||||
cache_state_id=row[0],
|
||||
asset_id=row[1],
|
||||
asset_info_id=row[2],
|
||||
file_path=row[3],
|
||||
enrichment_level=row[4],
|
||||
)
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def update_enrichment_level(
|
||||
session: Session,
|
||||
cache_state_id: int,
|
||||
level: int,
|
||||
) -> None:
|
||||
"""Update the enrichment level for a cache state."""
|
||||
session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.id == cache_state_id)
|
||||
.values(enrichment_level=level)
|
||||
)
|
||||
|
||||
|
||||
def bulk_update_enrichment_level(
|
||||
session: Session,
|
||||
cache_state_ids: list[int],
|
||||
level: int,
|
||||
) -> int:
|
||||
"""Update enrichment level for multiple cache states.
|
||||
|
||||
Returns: Number of rows updated
|
||||
"""
|
||||
if not cache_state_ids:
|
||||
return 0
|
||||
result = session.execute(
|
||||
sa.update(AssetCacheState)
|
||||
.where(AssetCacheState.id.in_(cache_state_ids))
|
||||
.values(enrichment_level=level)
|
||||
)
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def bulk_insert_cache_states_ignore_conflicts(
|
||||
session: Session,
|
||||
rows: list[dict],
|
||||
) -> None:
|
||||
"""Bulk insert cache state rows with ON CONFLICT DO NOTHING on file_path.
|
||||
|
||||
Each dict should have: asset_id, file_path, mtime_ns
|
||||
The is_missing field is automatically set to False for new inserts.
|
||||
"""
|
||||
if not rows:
|
||||
return
|
||||
enriched_rows = [{**row, "is_missing": False} for row in rows]
|
||||
ins = sqlite.insert(AssetCacheState).on_conflict_do_nothing(
|
||||
index_elements=[AssetCacheState.file_path]
|
||||
)
|
||||
for chunk in iter_chunks(enriched_rows, calculate_rows_per_statement(4)):
|
||||
session.execute(ins, chunk)
|
||||
|
||||
|
||||
def get_cache_states_by_paths_and_asset_ids(
|
||||
session: Session,
|
||||
path_to_asset: dict[str, str],
|
||||
) -> set[str]:
|
||||
"""Query cache states to find paths where our asset_id won the insert.
|
||||
|
||||
Args:
|
||||
path_to_asset: Mapping of file_path -> asset_id we tried to insert
|
||||
|
||||
Returns:
|
||||
Set of file_paths where our asset_id is present
|
||||
"""
|
||||
if not path_to_asset:
|
||||
return set()
|
||||
|
||||
paths = list(path_to_asset.keys())
|
||||
winners: set[str] = set()
|
||||
|
||||
for chunk in iter_chunks(paths, MAX_BIND_PARAMS):
|
||||
result = session.execute(
|
||||
select(AssetCacheState.file_path).where(
|
||||
AssetCacheState.file_path.in_(chunk),
|
||||
AssetCacheState.asset_id.in_([path_to_asset[p] for p in chunk]),
|
||||
)
|
||||
)
|
||||
winners.update(result.scalars().all())
|
||||
|
||||
return winners
|
||||
@@ -4,7 +4,7 @@ from typing import Iterable
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from app.assets.database.models import AssetInfo
|
||||
from app.assets.database.models import AssetReference
|
||||
|
||||
MAX_BIND_PARAMS = 800
|
||||
|
||||
@@ -30,8 +30,11 @@ def iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]
|
||||
|
||||
|
||||
def build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
|
||||
"""Build owner visibility predicate for reads. Owner-less rows are visible to everyone."""
|
||||
"""Build owner visibility predicate for reads.
|
||||
|
||||
Owner-less rows are visible to everyone.
|
||||
"""
|
||||
owner_id = (owner_id or "").strip()
|
||||
if owner_id == "":
|
||||
return AssetInfo.owner_id == ""
|
||||
return AssetInfo.owner_id.in_(["", owner_id])
|
||||
return AssetReference.owner_id == ""
|
||||
return AssetReference.owner_id.in_(["", owner_id])
|
||||
|
||||
@@ -6,7 +6,12 @@ from sqlalchemy.dialects import sqlite
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import AssetInfo, AssetInfoMeta, AssetInfoTag, Tag
|
||||
from app.assets.database.models import (
|
||||
AssetReference,
|
||||
AssetReferenceMeta,
|
||||
AssetReferenceTag,
|
||||
Tag,
|
||||
)
|
||||
from app.assets.database.queries.common import (
|
||||
build_visible_owner_clause,
|
||||
iter_row_chunks,
|
||||
@@ -47,22 +52,22 @@ def ensure_tags_exist(
|
||||
session.execute(ins)
|
||||
|
||||
|
||||
def get_asset_tags(session: Session, asset_info_id: str) -> list[str]:
|
||||
def get_reference_tags(session: Session, reference_id: str) -> list[str]:
|
||||
return [
|
||||
tag_name
|
||||
for (tag_name,) in (
|
||||
session.execute(
|
||||
select(AssetInfoTag.tag_name).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id
|
||||
select(AssetReferenceTag.tag_name).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id
|
||||
)
|
||||
)
|
||||
).all()
|
||||
]
|
||||
|
||||
|
||||
def set_asset_info_tags(
|
||||
def set_reference_tags(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
tags: Sequence[str],
|
||||
origin: str = "manual",
|
||||
) -> SetTagsDict:
|
||||
@@ -72,8 +77,8 @@ def set_asset_info_tags(
|
||||
tag_name
|
||||
for (tag_name,) in (
|
||||
session.execute(
|
||||
select(AssetInfoTag.tag_name).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id
|
||||
select(AssetReferenceTag.tag_name).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id
|
||||
)
|
||||
)
|
||||
).all()
|
||||
@@ -86,8 +91,8 @@ def set_asset_info_tags(
|
||||
ensure_tags_exist(session, to_add, tag_type="user")
|
||||
session.add_all(
|
||||
[
|
||||
AssetInfoTag(
|
||||
asset_info_id=asset_info_id,
|
||||
AssetReferenceTag(
|
||||
asset_reference_id=reference_id,
|
||||
tag_name=t,
|
||||
origin=origin,
|
||||
added_at=get_utc_now(),
|
||||
@@ -99,9 +104,9 @@ def set_asset_info_tags(
|
||||
|
||||
if to_remove:
|
||||
session.execute(
|
||||
delete(AssetInfoTag).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id,
|
||||
AssetInfoTag.tag_name.in_(to_remove),
|
||||
delete(AssetReferenceTag).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id,
|
||||
AssetReferenceTag.tag_name.in_(to_remove),
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
@@ -109,22 +114,22 @@ def set_asset_info_tags(
|
||||
return {"added": to_add, "removed": to_remove, "total": desired}
|
||||
|
||||
|
||||
def add_tags_to_asset_info(
|
||||
def add_tags_to_reference(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
tags: Sequence[str],
|
||||
origin: str = "manual",
|
||||
create_if_missing: bool = True,
|
||||
asset_info_row: AssetInfo | None = None,
|
||||
reference_row: AssetReference | None = None,
|
||||
) -> AddTagsDict:
|
||||
if not asset_info_row:
|
||||
info = session.get(AssetInfo, asset_info_id)
|
||||
if not info:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
if not reference_row:
|
||||
ref = session.get(AssetReference, reference_id)
|
||||
if not ref:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
|
||||
norm = normalize_tags(tags)
|
||||
if not norm:
|
||||
total = get_asset_tags(session, asset_info_id=asset_info_id)
|
||||
total = get_reference_tags(session, reference_id=reference_id)
|
||||
return {"added": [], "already_present": [], "total_tags": total}
|
||||
|
||||
if create_if_missing:
|
||||
@@ -134,8 +139,8 @@ def add_tags_to_asset_info(
|
||||
tag_name
|
||||
for (tag_name,) in (
|
||||
session.execute(
|
||||
sa.select(AssetInfoTag.tag_name).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id
|
||||
sa.select(AssetReferenceTag.tag_name).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id
|
||||
)
|
||||
)
|
||||
).all()
|
||||
@@ -149,8 +154,8 @@ def add_tags_to_asset_info(
|
||||
try:
|
||||
session.add_all(
|
||||
[
|
||||
AssetInfoTag(
|
||||
asset_info_id=asset_info_id,
|
||||
AssetReferenceTag(
|
||||
asset_reference_id=reference_id,
|
||||
tag_name=t,
|
||||
origin=origin,
|
||||
added_at=get_utc_now(),
|
||||
@@ -162,7 +167,7 @@ def add_tags_to_asset_info(
|
||||
except IntegrityError:
|
||||
nested.rollback()
|
||||
|
||||
after = set(get_asset_tags(session, asset_info_id=asset_info_id))
|
||||
after = set(get_reference_tags(session, reference_id=reference_id))
|
||||
return {
|
||||
"added": sorted(((after - current) & want)),
|
||||
"already_present": sorted(want & current),
|
||||
@@ -170,26 +175,26 @@ def add_tags_to_asset_info(
|
||||
}
|
||||
|
||||
|
||||
def remove_tags_from_asset_info(
|
||||
def remove_tags_from_reference(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
tags: Sequence[str],
|
||||
) -> RemoveTagsDict:
|
||||
info = session.get(AssetInfo, asset_info_id)
|
||||
if not info:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
ref = session.get(AssetReference, reference_id)
|
||||
if not ref:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
|
||||
norm = normalize_tags(tags)
|
||||
if not norm:
|
||||
total = get_asset_tags(session, asset_info_id=asset_info_id)
|
||||
total = get_reference_tags(session, reference_id=reference_id)
|
||||
return {"removed": [], "not_present": [], "total_tags": total}
|
||||
|
||||
existing = {
|
||||
tag_name
|
||||
for (tag_name,) in (
|
||||
session.execute(
|
||||
sa.select(AssetInfoTag.tag_name).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id
|
||||
sa.select(AssetReferenceTag.tag_name).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id
|
||||
)
|
||||
)
|
||||
).all()
|
||||
@@ -200,14 +205,14 @@ def remove_tags_from_asset_info(
|
||||
|
||||
if to_remove:
|
||||
session.execute(
|
||||
delete(AssetInfoTag).where(
|
||||
AssetInfoTag.asset_info_id == asset_info_id,
|
||||
AssetInfoTag.tag_name.in_(to_remove),
|
||||
delete(AssetReferenceTag).where(
|
||||
AssetReferenceTag.asset_reference_id == reference_id,
|
||||
AssetReferenceTag.tag_name.in_(to_remove),
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
total = get_asset_tags(session, asset_info_id=asset_info_id)
|
||||
total = get_reference_tags(session, reference_id=reference_id)
|
||||
return {"removed": to_remove, "not_present": not_present, "total_tags": total}
|
||||
|
||||
|
||||
@@ -218,29 +223,32 @@ def add_missing_tag_for_asset_id(
|
||||
) -> None:
|
||||
select_rows = (
|
||||
sa.select(
|
||||
AssetInfo.id.label("asset_info_id"),
|
||||
AssetReference.id.label("asset_reference_id"),
|
||||
sa.literal("missing").label("tag_name"),
|
||||
sa.literal(origin).label("origin"),
|
||||
sa.literal(get_utc_now()).label("added_at"),
|
||||
)
|
||||
.where(AssetInfo.asset_id == asset_id)
|
||||
.where(AssetReference.asset_id == asset_id)
|
||||
.where(
|
||||
sa.not_(
|
||||
sa.exists().where(
|
||||
(AssetInfoTag.asset_info_id == AssetInfo.id)
|
||||
& (AssetInfoTag.tag_name == "missing")
|
||||
(AssetReferenceTag.asset_reference_id == AssetReference.id)
|
||||
& (AssetReferenceTag.tag_name == "missing")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
session.execute(
|
||||
sqlite.insert(AssetInfoTag)
|
||||
sqlite.insert(AssetReferenceTag)
|
||||
.from_select(
|
||||
["asset_info_id", "tag_name", "origin", "added_at"],
|
||||
["asset_reference_id", "tag_name", "origin", "added_at"],
|
||||
select_rows,
|
||||
)
|
||||
.on_conflict_do_nothing(
|
||||
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]
|
||||
index_elements=[
|
||||
AssetReferenceTag.asset_reference_id,
|
||||
AssetReferenceTag.tag_name,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
@@ -250,11 +258,11 @@ def remove_missing_tag_for_asset_id(
|
||||
asset_id: str,
|
||||
) -> None:
|
||||
session.execute(
|
||||
sa.delete(AssetInfoTag).where(
|
||||
AssetInfoTag.asset_info_id.in_(
|
||||
sa.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)
|
||||
sa.delete(AssetReferenceTag).where(
|
||||
AssetReferenceTag.asset_reference_id.in_(
|
||||
sa.select(AssetReference.id).where(AssetReference.asset_id == asset_id)
|
||||
),
|
||||
AssetInfoTag.tag_name == "missing",
|
||||
AssetReferenceTag.tag_name == "missing",
|
||||
)
|
||||
)
|
||||
|
||||
@@ -270,13 +278,13 @@ def list_tags_with_usage(
|
||||
) -> tuple[list[tuple[str, str, int]], int]:
|
||||
counts_sq = (
|
||||
select(
|
||||
AssetInfoTag.tag_name.label("tag_name"),
|
||||
func.count(AssetInfoTag.asset_info_id).label("cnt"),
|
||||
AssetReferenceTag.tag_name.label("tag_name"),
|
||||
func.count(AssetReferenceTag.asset_reference_id).label("cnt"),
|
||||
)
|
||||
.select_from(AssetInfoTag)
|
||||
.join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id)
|
||||
.select_from(AssetReferenceTag)
|
||||
.join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
|
||||
.where(build_visible_owner_clause(owner_id))
|
||||
.group_by(AssetInfoTag.tag_name)
|
||||
.group_by(AssetReferenceTag.tag_name)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
@@ -308,7 +316,9 @@ def list_tags_with_usage(
|
||||
total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc))
|
||||
if not include_zero:
|
||||
total_q = total_q.where(
|
||||
Tag.name.in_(select(AssetInfoTag.tag_name).group_by(AssetInfoTag.tag_name))
|
||||
Tag.name.in_(
|
||||
select(AssetReferenceTag.tag_name).group_by(AssetReferenceTag.tag_name)
|
||||
)
|
||||
)
|
||||
|
||||
rows = (session.execute(q.limit(limit).offset(offset))).all()
|
||||
@@ -323,26 +333,31 @@ def bulk_insert_tags_and_meta(
|
||||
tag_rows: list[dict],
|
||||
meta_rows: list[dict],
|
||||
) -> None:
|
||||
"""Batch insert into asset_info_tags and asset_info_meta with ON CONFLICT DO NOTHING.
|
||||
"""Batch insert into asset_reference_tags and asset_reference_meta.
|
||||
|
||||
Uses ON CONFLICT DO NOTHING.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
tag_rows: List of dicts with keys: asset_info_id, tag_name, origin, added_at
|
||||
meta_rows: List of dicts with keys: asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json
|
||||
tag_rows: Dicts with: asset_reference_id, tag_name, origin, added_at
|
||||
meta_rows: Dicts with: asset_reference_id, key, ordinal, val_*
|
||||
"""
|
||||
if tag_rows:
|
||||
ins_tags = sqlite.insert(AssetInfoTag).on_conflict_do_nothing(
|
||||
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]
|
||||
ins_tags = sqlite.insert(AssetReferenceTag).on_conflict_do_nothing(
|
||||
index_elements=[
|
||||
AssetReferenceTag.asset_reference_id,
|
||||
AssetReferenceTag.tag_name,
|
||||
]
|
||||
)
|
||||
for chunk in iter_row_chunks(tag_rows, cols_per_row=4):
|
||||
session.execute(ins_tags, chunk)
|
||||
|
||||
if meta_rows:
|
||||
ins_meta = sqlite.insert(AssetInfoMeta).on_conflict_do_nothing(
|
||||
ins_meta = sqlite.insert(AssetReferenceMeta).on_conflict_do_nothing(
|
||||
index_elements=[
|
||||
AssetInfoMeta.asset_info_id,
|
||||
AssetInfoMeta.key,
|
||||
AssetInfoMeta.ordinal,
|
||||
AssetReferenceMeta.asset_reference_id,
|
||||
AssetReferenceMeta.key,
|
||||
AssetReferenceMeta.ordinal,
|
||||
]
|
||||
)
|
||||
for chunk in iter_row_chunks(meta_rows, cols_per_row=7):
|
||||
|
||||
@@ -31,8 +31,9 @@ ALLOWED_ROOTS: tuple[Literal["models", "input", "output"], ...] = (
|
||||
|
||||
|
||||
def escape_sql_like_string(s: str, escape: str = "!") -> tuple[str, str]:
|
||||
"""Escapes %, _ and the escape char itself in a LIKE prefix.
|
||||
Returns (escaped_prefix, escape_char). Caller should append '%' and pass escape=escape_char to .like().
|
||||
"""Escapes %, _ and the escape char in a LIKE prefix.
|
||||
|
||||
Returns (escaped_prefix, escape_char).
|
||||
"""
|
||||
s = s.replace(escape, escape + escape) # escape the escape char first
|
||||
s = s.replace("%", escape + "%").replace("_", escape + "_") # escape LIKE wildcards
|
||||
|
||||
@@ -10,13 +10,16 @@ from app.assets.database.queries import (
|
||||
bulk_update_enrichment_level,
|
||||
bulk_update_is_missing,
|
||||
bulk_update_needs_verify,
|
||||
delete_cache_states_by_ids,
|
||||
delete_orphaned_seed_asset,
|
||||
delete_references_by_ids,
|
||||
ensure_tags_exist,
|
||||
get_cache_states_for_prefixes,
|
||||
get_unenriched_cache_states,
|
||||
get_asset_by_hash,
|
||||
get_references_for_prefixes,
|
||||
get_unenriched_references,
|
||||
reassign_asset_references,
|
||||
remove_missing_tag_for_asset_id,
|
||||
set_asset_info_metadata,
|
||||
set_reference_metadata,
|
||||
update_asset_hash_and_mime,
|
||||
)
|
||||
from app.assets.services.bulk_ingest import (
|
||||
SeedAssetSpec,
|
||||
@@ -38,8 +41,8 @@ from app.assets.services.path_utils import (
|
||||
from app.database.db import create_session, dependencies_available
|
||||
|
||||
|
||||
class _StateInfo(TypedDict):
|
||||
sid: int
|
||||
class _RefInfo(TypedDict):
|
||||
ref_id: str
|
||||
fp: str
|
||||
exists: bool
|
||||
fast_ok: bool
|
||||
@@ -49,7 +52,7 @@ class _StateInfo(TypedDict):
|
||||
class _AssetAccumulator(TypedDict):
|
||||
hash: str | None
|
||||
size_db: int
|
||||
states: list[_StateInfo]
|
||||
refs: list[_RefInfo]
|
||||
|
||||
|
||||
RootType = Literal["models", "input", "output"]
|
||||
@@ -97,17 +100,17 @@ def collect_models_files() -> list[str]:
|
||||
return out
|
||||
|
||||
|
||||
def sync_cache_states_with_filesystem(
|
||||
def sync_references_with_filesystem(
|
||||
session,
|
||||
root: RootType,
|
||||
collect_existing_paths: bool = False,
|
||||
update_missing_tags: bool = False,
|
||||
) -> set[str] | None:
|
||||
"""Reconcile cache states with filesystem for a root.
|
||||
"""Reconcile asset references with filesystem for a root.
|
||||
|
||||
- Toggle needs_verify per state using fast mtime/size check
|
||||
- For hashed assets with at least one fast-ok state in this root: delete stale missing states
|
||||
- For seed assets with all states missing: delete Asset and its AssetInfos
|
||||
- Toggle needs_verify per reference using fast mtime/size check
|
||||
- For hashed assets with at least one fast-ok ref: delete stale missing refs
|
||||
- For seed assets with all refs missing: delete Asset and its references
|
||||
- Optionally add/remove 'missing' tags based on fast-ok in this root
|
||||
- Optionally return surviving absolute paths
|
||||
|
||||
@@ -124,7 +127,7 @@ def sync_cache_states_with_filesystem(
|
||||
if not prefixes:
|
||||
return set() if collect_existing_paths else None
|
||||
|
||||
rows = get_cache_states_for_prefixes(
|
||||
rows = get_references_for_prefixes(
|
||||
session, prefixes, include_missing=update_missing_tags
|
||||
)
|
||||
|
||||
@@ -132,7 +135,7 @@ def sync_cache_states_with_filesystem(
|
||||
for row in rows:
|
||||
acc = by_asset.get(row.asset_id)
|
||||
if acc is None:
|
||||
acc = {"hash": row.asset_hash, "size_db": row.size_bytes, "states": []}
|
||||
acc = {"hash": row.asset_hash, "size_db": row.size_bytes, "refs": []}
|
||||
by_asset[row.asset_id] = acc
|
||||
|
||||
fast_ok = False
|
||||
@@ -152,9 +155,9 @@ def sync_cache_states_with_filesystem(
|
||||
exists = False
|
||||
logging.debug("OSError checking %s: %s", row.file_path, e)
|
||||
|
||||
acc["states"].append(
|
||||
acc["refs"].append(
|
||||
{
|
||||
"sid": row.state_id,
|
||||
"ref_id": row.reference_id,
|
||||
"fp": row.file_path,
|
||||
"exists": exists,
|
||||
"fast_ok": fast_ok,
|
||||
@@ -162,61 +165,63 @@ def sync_cache_states_with_filesystem(
|
||||
}
|
||||
)
|
||||
|
||||
to_set_verify: list[int] = []
|
||||
to_clear_verify: list[int] = []
|
||||
stale_state_ids: list[int] = []
|
||||
to_mark_missing: list[int] = []
|
||||
to_clear_missing: list[int] = []
|
||||
to_set_verify: list[str] = []
|
||||
to_clear_verify: list[str] = []
|
||||
stale_ref_ids: list[str] = []
|
||||
to_mark_missing: list[str] = []
|
||||
to_clear_missing: list[str] = []
|
||||
survivors: set[str] = set()
|
||||
|
||||
for aid, acc in by_asset.items():
|
||||
a_hash = acc["hash"]
|
||||
states = acc["states"]
|
||||
any_fast_ok = any(s["fast_ok"] for s in states)
|
||||
all_missing = all(not s["exists"] for s in states)
|
||||
refs = acc["refs"]
|
||||
any_fast_ok = any(r["fast_ok"] for r in refs)
|
||||
all_missing = all(not r["exists"] for r in refs)
|
||||
|
||||
for s in states:
|
||||
if not s["exists"]:
|
||||
to_mark_missing.append(s["sid"])
|
||||
for r in refs:
|
||||
if not r["exists"]:
|
||||
to_mark_missing.append(r["ref_id"])
|
||||
continue
|
||||
if s["fast_ok"]:
|
||||
to_clear_missing.append(s["sid"])
|
||||
if s["needs_verify"]:
|
||||
to_clear_verify.append(s["sid"])
|
||||
if not s["fast_ok"] and not s["needs_verify"]:
|
||||
to_set_verify.append(s["sid"])
|
||||
if r["fast_ok"]:
|
||||
to_clear_missing.append(r["ref_id"])
|
||||
if r["needs_verify"]:
|
||||
to_clear_verify.append(r["ref_id"])
|
||||
if not r["fast_ok"] and not r["needs_verify"]:
|
||||
to_set_verify.append(r["ref_id"])
|
||||
|
||||
if a_hash is None:
|
||||
if states and all_missing:
|
||||
if refs and all_missing:
|
||||
delete_orphaned_seed_asset(session, aid)
|
||||
else:
|
||||
for s in states:
|
||||
if s["exists"]:
|
||||
survivors.add(os.path.abspath(s["fp"]))
|
||||
for r in refs:
|
||||
if r["exists"]:
|
||||
survivors.add(os.path.abspath(r["fp"]))
|
||||
continue
|
||||
|
||||
if any_fast_ok:
|
||||
for s in states:
|
||||
if not s["exists"]:
|
||||
stale_state_ids.append(s["sid"])
|
||||
for r in refs:
|
||||
if not r["exists"]:
|
||||
stale_ref_ids.append(r["ref_id"])
|
||||
if update_missing_tags:
|
||||
try:
|
||||
remove_missing_tag_for_asset_id(session, asset_id=aid)
|
||||
except Exception as e:
|
||||
logging.warning("Failed to remove missing tag for asset %s: %s", aid, e)
|
||||
logging.warning(
|
||||
"Failed to remove missing tag for asset %s: %s", aid, e
|
||||
)
|
||||
elif update_missing_tags:
|
||||
try:
|
||||
add_missing_tag_for_asset_id(session, asset_id=aid, origin="automatic")
|
||||
except Exception as e:
|
||||
logging.warning("Failed to add missing tag for asset %s: %s", aid, e)
|
||||
|
||||
for s in states:
|
||||
if s["exists"]:
|
||||
survivors.add(os.path.abspath(s["fp"]))
|
||||
for r in refs:
|
||||
if r["exists"]:
|
||||
survivors.add(os.path.abspath(r["fp"]))
|
||||
|
||||
delete_cache_states_by_ids(session, stale_state_ids)
|
||||
stale_set = set(stale_state_ids)
|
||||
to_mark_missing = [sid for sid in to_mark_missing if sid not in stale_set]
|
||||
delete_references_by_ids(session, stale_ref_ids)
|
||||
stale_set = set(stale_ref_ids)
|
||||
to_mark_missing = [ref_id for ref_id in to_mark_missing if ref_id not in stale_set]
|
||||
bulk_update_is_missing(session, to_mark_missing, value=True)
|
||||
bulk_update_is_missing(session, to_clear_missing, value=False)
|
||||
bulk_update_needs_verify(session, to_set_verify, value=True)
|
||||
@@ -226,13 +231,13 @@ def sync_cache_states_with_filesystem(
|
||||
|
||||
|
||||
def sync_root_safely(root: RootType) -> set[str]:
|
||||
"""Sync a single root's cache states with the filesystem.
|
||||
"""Sync a single root's references with the filesystem.
|
||||
|
||||
Returns survivors (existing paths) or empty set on failure.
|
||||
"""
|
||||
try:
|
||||
with create_session() as sess:
|
||||
survivors = sync_cache_states_with_filesystem(
|
||||
survivors = sync_references_with_filesystem(
|
||||
sess,
|
||||
root,
|
||||
collect_existing_paths=True,
|
||||
@@ -246,7 +251,7 @@ def sync_root_safely(root: RootType) -> set[str]:
|
||||
|
||||
|
||||
def mark_missing_outside_prefixes_safely(prefixes: list[str]) -> int:
|
||||
"""Mark cache states as missing when outside the given prefixes.
|
||||
"""Mark references as missing when outside the given prefixes.
|
||||
|
||||
This is a non-destructive soft-delete. Returns count marked or 0 on failure.
|
||||
"""
|
||||
@@ -283,8 +288,8 @@ def build_asset_specs(
|
||||
Args:
|
||||
paths: List of file paths to process
|
||||
existing_paths: Set of paths that already exist in the database
|
||||
enable_metadata_extraction: If True, extract tier 1 & 2 metadata from files
|
||||
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||
enable_metadata_extraction: If True, extract tier 1 & 2 metadata
|
||||
compute_hashes: If True, compute blake3 hashes (slow for large files)
|
||||
"""
|
||||
specs: list[SeedAssetSpec] = []
|
||||
tag_pool: set[str] = set()
|
||||
@@ -398,7 +403,7 @@ def build_stub_specs(
|
||||
|
||||
|
||||
def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
|
||||
"""Insert asset specs into database, returning count of created infos."""
|
||||
"""Insert asset specs into database, returning count of created refs."""
|
||||
if not specs:
|
||||
return 0
|
||||
with create_session() as sess:
|
||||
@@ -406,7 +411,7 @@ def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
|
||||
ensure_tags_exist(sess, tag_pool, tag_type="user")
|
||||
result = batch_insert_seed_assets(sess, specs=specs, owner_id="")
|
||||
sess.commit()
|
||||
return result.inserted_infos
|
||||
return result.inserted_refs
|
||||
|
||||
|
||||
def seed_assets(
|
||||
@@ -419,10 +424,10 @@ def seed_assets(
|
||||
Args:
|
||||
roots: Tuple of root types to scan (models, input, output)
|
||||
enable_logging: If True, log progress and completion messages
|
||||
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||
compute_hashes: If True, compute blake3 hashes (slow for large files)
|
||||
|
||||
Note: This function does not mark missing assets. Call mark_missing_outside_prefixes_safely
|
||||
separately if cleanup is needed.
|
||||
Note: This function does not mark missing assets.
|
||||
Call mark_missing_outside_prefixes_safely separately if cleanup is needed.
|
||||
"""
|
||||
if not dependencies_available():
|
||||
if enable_logging:
|
||||
@@ -443,7 +448,8 @@ def seed_assets(
|
||||
|
||||
if enable_logging:
|
||||
logging.info(
|
||||
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
|
||||
"Assets scan(roots=%s) completed in %.3fs "
|
||||
"(created=%d, skipped_existing=%d, total_seen=%d)",
|
||||
roots,
|
||||
time.perf_counter() - t_start,
|
||||
created,
|
||||
@@ -471,7 +477,7 @@ def get_unenriched_assets_for_roots(
|
||||
limit: Maximum number of rows to return
|
||||
|
||||
Returns:
|
||||
List of UnenrichedAssetRow
|
||||
List of UnenrichedReferenceRow
|
||||
"""
|
||||
prefixes: list[str] = []
|
||||
for root in roots:
|
||||
@@ -481,13 +487,15 @@ def get_unenriched_assets_for_roots(
|
||||
return []
|
||||
|
||||
with create_session() as sess:
|
||||
return get_unenriched_cache_states(sess, prefixes, max_level=max_level, limit=limit)
|
||||
return get_unenriched_references(
|
||||
sess, prefixes, max_level=max_level, limit=limit
|
||||
)
|
||||
|
||||
|
||||
def enrich_asset(
|
||||
file_path: str,
|
||||
cache_state_id: int,
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
asset_id: str,
|
||||
extract_metadata: bool = True,
|
||||
compute_hash: bool = False,
|
||||
) -> int:
|
||||
@@ -495,8 +503,8 @@ def enrich_asset(
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the file
|
||||
cache_state_id: ID of the cache state to update
|
||||
asset_info_id: ID of the asset info to update
|
||||
reference_id: ID of the reference to update
|
||||
asset_id: ID of the asset to update (for mime_type and hash)
|
||||
extract_metadata: If True, extract safetensors header and mime type
|
||||
compute_hash: If True, compute blake3 hash
|
||||
|
||||
@@ -511,30 +519,46 @@ def enrich_asset(
|
||||
return new_level
|
||||
|
||||
rel_fname = compute_relative_filename(file_path)
|
||||
mime_type: str | None = None
|
||||
|
||||
if extract_metadata:
|
||||
metadata = extract_file_metadata(
|
||||
file_path,
|
||||
stat_result=stat_p,
|
||||
enable_safetensors=True,
|
||||
relative_filename=rel_fname,
|
||||
)
|
||||
if metadata:
|
||||
mime_type = metadata.content_type
|
||||
new_level = ENRICHMENT_METADATA
|
||||
|
||||
full_hash: str | None = None
|
||||
if compute_hash:
|
||||
try:
|
||||
digest = compute_blake3_hash(file_path)
|
||||
full_hash = f"blake3:{digest}"
|
||||
new_level = ENRICHMENT_HASHED
|
||||
except Exception as e:
|
||||
logging.warning("Failed to hash %s: %s", file_path, e)
|
||||
|
||||
with create_session() as sess:
|
||||
if extract_metadata:
|
||||
metadata = extract_file_metadata(
|
||||
file_path,
|
||||
stat_result=stat_p,
|
||||
enable_safetensors=True,
|
||||
relative_filename=rel_fname,
|
||||
)
|
||||
if metadata:
|
||||
user_metadata = metadata.to_user_metadata()
|
||||
set_asset_info_metadata(sess, asset_info_id, user_metadata)
|
||||
new_level = ENRICHMENT_METADATA
|
||||
if extract_metadata and metadata:
|
||||
user_metadata = metadata.to_user_metadata()
|
||||
set_reference_metadata(sess, reference_id, user_metadata)
|
||||
|
||||
if compute_hash:
|
||||
try:
|
||||
digest = compute_blake3_hash(file_path)
|
||||
# TODO: Update asset.hash field
|
||||
# For now just mark the enrichment level
|
||||
new_level = ENRICHMENT_HASHED
|
||||
except Exception as e:
|
||||
logging.warning("Failed to hash %s: %s", file_path, e)
|
||||
if full_hash:
|
||||
existing = get_asset_by_hash(sess, full_hash)
|
||||
if existing and existing.id != asset_id:
|
||||
reassign_asset_references(sess, asset_id, existing.id, reference_id)
|
||||
delete_orphaned_seed_asset(sess, asset_id)
|
||||
if mime_type:
|
||||
update_asset_hash_and_mime(sess, existing.id, mime_type=mime_type)
|
||||
else:
|
||||
update_asset_hash_and_mime(sess, asset_id, full_hash, mime_type)
|
||||
elif mime_type:
|
||||
update_asset_hash_and_mime(sess, asset_id, mime_type=mime_type)
|
||||
|
||||
bulk_update_enrichment_level(sess, [cache_state_id], new_level)
|
||||
bulk_update_enrichment_level(sess, [reference_id], new_level)
|
||||
sess.commit()
|
||||
|
||||
return new_level
|
||||
@@ -548,7 +572,7 @@ def enrich_assets_batch(
|
||||
"""Enrich a batch of assets.
|
||||
|
||||
Args:
|
||||
rows: List of UnenrichedAssetRow from get_unenriched_assets_for_roots
|
||||
rows: List of UnenrichedReferenceRow from get_unenriched_assets_for_roots
|
||||
extract_metadata: If True, extract metadata for each asset
|
||||
compute_hash: If True, compute hash for each asset
|
||||
|
||||
@@ -562,8 +586,8 @@ def enrich_assets_batch(
|
||||
try:
|
||||
new_level = enrich_asset(
|
||||
file_path=row.file_path,
|
||||
cache_state_id=row.cache_state_id,
|
||||
asset_info_id=row.asset_info_id,
|
||||
reference_id=row.reference_id,
|
||||
asset_id=row.asset_id,
|
||||
extract_metadata=extract_metadata,
|
||||
compute_hash=compute_hash,
|
||||
)
|
||||
|
||||
@@ -128,7 +128,7 @@ class AssetSeeder:
|
||||
phase: Scan phase to run (FAST, ENRICH, or FULL for both)
|
||||
progress_callback: Optional callback called with progress updates
|
||||
prune_first: If True, prune orphaned assets before scanning
|
||||
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||
compute_hashes: If True, compute blake3 hashes (slow)
|
||||
|
||||
Returns:
|
||||
True if scan was started, False if already running
|
||||
@@ -136,7 +136,7 @@ class AssetSeeder:
|
||||
if self._disabled:
|
||||
logging.debug("Asset seeder is disabled, skipping start")
|
||||
return False
|
||||
logging.info("Asset seeder start requested (roots=%s, phase=%s)", roots, phase.value)
|
||||
logging.info("Seeder start (roots=%s, phase=%s)", roots, phase.value)
|
||||
with self._lock:
|
||||
if self._state != State.IDLE:
|
||||
logging.info("Asset seeder already running, skipping start")
|
||||
@@ -295,12 +295,15 @@ class AssetSeeder:
|
||||
if not self.wait(timeout=timeout):
|
||||
return False
|
||||
|
||||
cb = progress_callback if progress_callback is not None else prev_callback
|
||||
return self.start(
|
||||
roots=roots if roots is not None else prev_roots,
|
||||
phase=phase if phase is not None else prev_phase,
|
||||
progress_callback=progress_callback if progress_callback is not None else prev_callback,
|
||||
progress_callback=cb,
|
||||
prune_first=prune_first if prune_first is not None else prev_prune,
|
||||
compute_hashes=compute_hashes if compute_hashes is not None else prev_hashes,
|
||||
compute_hashes=(
|
||||
compute_hashes if compute_hashes is not None else prev_hashes
|
||||
),
|
||||
)
|
||||
|
||||
def wait(self, timeout: float | None = None) -> bool:
|
||||
@@ -497,7 +500,7 @@ class AssetSeeder:
|
||||
all_prefixes = get_all_known_prefixes()
|
||||
marked = mark_missing_outside_prefixes_safely(all_prefixes)
|
||||
if marked > 0:
|
||||
logging.info("Marked %d cache states as missing before scan", marked)
|
||||
logging.info("Marked %d refs as missing before scan", marked)
|
||||
|
||||
if self._check_pause_and_cancel():
|
||||
logging.info("Asset scan cancelled after pruning phase")
|
||||
@@ -508,7 +511,8 @@ class AssetSeeder:
|
||||
|
||||
# Phase 1: Fast scan (stub records)
|
||||
if phase in (ScanPhase.FAST, ScanPhase.FULL):
|
||||
total_created, skipped_existing, total_paths = self._run_fast_phase(roots)
|
||||
created, skipped, paths = self._run_fast_phase(roots)
|
||||
total_created, skipped_existing, total_paths = created, skipped, paths
|
||||
|
||||
if self._check_pause_and_cancel():
|
||||
cancelled = True
|
||||
@@ -542,12 +546,8 @@ class AssetSeeder:
|
||||
|
||||
elapsed = time.perf_counter() - t_start
|
||||
logging.info(
|
||||
"Asset scan(roots=%s, phase=%s) completed in %.3fs (created=%d, enriched=%d, skipped=%d)",
|
||||
roots,
|
||||
phase.value,
|
||||
elapsed,
|
||||
total_created,
|
||||
total_enriched,
|
||||
"Scan(%s, %s) done %.3fs: created=%d enriched=%d skipped=%d",
|
||||
roots, phase.value, elapsed, total_created, total_enriched,
|
||||
skipped_existing,
|
||||
)
|
||||
|
||||
@@ -668,7 +668,10 @@ class AssetSeeder:
|
||||
progress_interval = 1.0
|
||||
|
||||
# Get the target enrichment level based on compute_hashes
|
||||
target_max_level = ENRICHMENT_STUB if not self._compute_hashes else ENRICHMENT_METADATA
|
||||
if not self._compute_hashes:
|
||||
target_max_level = ENRICHMENT_STUB
|
||||
else:
|
||||
target_max_level = ENRICHMENT_METADATA
|
||||
|
||||
self._emit_event(
|
||||
"assets.seed.started",
|
||||
|
||||
@@ -30,11 +30,11 @@ from app.assets.services.schemas import (
|
||||
AddTagsResult,
|
||||
AssetData,
|
||||
AssetDetailResult,
|
||||
AssetInfoData,
|
||||
AssetSummaryData,
|
||||
DownloadResolutionResult,
|
||||
IngestResult,
|
||||
ListAssetsResult,
|
||||
ReferenceData,
|
||||
RegisterAssetResult,
|
||||
RemoveTagsResult,
|
||||
SetTagsResult,
|
||||
@@ -52,8 +52,8 @@ __all__ = [
|
||||
"AddTagsResult",
|
||||
"AssetData",
|
||||
"AssetDetailResult",
|
||||
"AssetInfoData",
|
||||
"AssetSummaryData",
|
||||
"ReferenceData",
|
||||
"BulkInsertResult",
|
||||
"DependencyMissingError",
|
||||
"DownloadResolutionResult",
|
||||
|
||||
@@ -7,23 +7,23 @@ from typing import Sequence
|
||||
from app.assets.database.models import Asset
|
||||
from app.assets.database.queries import (
|
||||
asset_exists_by_hash,
|
||||
asset_info_exists_for_asset_id,
|
||||
delete_asset_info_by_id,
|
||||
fetch_asset_info_and_asset,
|
||||
fetch_asset_info_asset_and_tags,
|
||||
reference_exists_for_asset_id,
|
||||
delete_reference_by_id,
|
||||
fetch_reference_and_asset,
|
||||
fetch_reference_asset_and_tags,
|
||||
get_asset_by_hash as queries_get_asset_by_hash,
|
||||
get_asset_info_by_id,
|
||||
list_asset_infos_page,
|
||||
list_cache_states_by_asset_id,
|
||||
set_asset_info_metadata,
|
||||
set_asset_info_preview,
|
||||
set_asset_info_tags,
|
||||
update_asset_info_access_time,
|
||||
update_asset_info_name,
|
||||
update_asset_info_updated_at,
|
||||
get_reference_by_id,
|
||||
list_references_page,
|
||||
list_references_by_asset_id,
|
||||
set_reference_metadata,
|
||||
set_reference_preview,
|
||||
set_reference_tags,
|
||||
update_reference_access_time,
|
||||
update_reference_name,
|
||||
update_reference_updated_at,
|
||||
)
|
||||
from app.assets.helpers import select_best_live_path
|
||||
from app.assets.services.path_utils import compute_filename_for_asset
|
||||
from app.assets.services.path_utils import compute_filename_for_reference
|
||||
from app.assets.services.schemas import (
|
||||
AssetData,
|
||||
AssetDetailResult,
|
||||
@@ -32,34 +32,34 @@ from app.assets.services.schemas import (
|
||||
ListAssetsResult,
|
||||
UserMetadata,
|
||||
extract_asset_data,
|
||||
extract_info_data,
|
||||
extract_reference_data,
|
||||
)
|
||||
from app.database.db import create_session
|
||||
|
||||
|
||||
def get_asset_detail(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
owner_id: str = "",
|
||||
) -> AssetDetailResult | None:
|
||||
with create_session() as session:
|
||||
result = fetch_asset_info_asset_and_tags(
|
||||
result = fetch_reference_asset_and_tags(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
owner_id=owner_id,
|
||||
)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
info, asset, tags = result
|
||||
ref, asset, tags = result
|
||||
return AssetDetailResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tags,
|
||||
)
|
||||
|
||||
|
||||
def update_asset_metadata(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
name: str | None = None,
|
||||
tags: Sequence[str] | None = None,
|
||||
user_metadata: UserMetadata = None,
|
||||
@@ -67,58 +67,58 @@ def update_asset_metadata(
|
||||
owner_id: str = "",
|
||||
) -> AssetDetailResult:
|
||||
with create_session() as session:
|
||||
info = get_asset_info_by_id(session, asset_info_id=asset_info_id)
|
||||
if not info:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
if info.owner_id and info.owner_id != owner_id:
|
||||
ref = get_reference_by_id(session, reference_id=reference_id)
|
||||
if not ref:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
if ref.owner_id and ref.owner_id != owner_id:
|
||||
raise PermissionError("not owner")
|
||||
|
||||
touched = False
|
||||
if name is not None and name != info.name:
|
||||
update_asset_info_name(session, asset_info_id=asset_info_id, name=name)
|
||||
if name is not None and name != ref.name:
|
||||
update_reference_name(session, reference_id=reference_id, name=name)
|
||||
touched = True
|
||||
|
||||
computed_filename = compute_filename_for_asset(session, info.asset_id)
|
||||
computed_filename = compute_filename_for_reference(session, ref)
|
||||
|
||||
new_meta: dict | None = None
|
||||
if user_metadata is not None:
|
||||
new_meta = dict(user_metadata)
|
||||
elif computed_filename:
|
||||
current_meta = info.user_metadata or {}
|
||||
current_meta = ref.user_metadata or {}
|
||||
if current_meta.get("filename") != computed_filename:
|
||||
new_meta = dict(current_meta)
|
||||
|
||||
if new_meta is not None:
|
||||
if computed_filename:
|
||||
new_meta["filename"] = computed_filename
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=asset_info_id, user_metadata=new_meta
|
||||
set_reference_metadata(
|
||||
session, reference_id=reference_id, user_metadata=new_meta
|
||||
)
|
||||
touched = True
|
||||
|
||||
if tags is not None:
|
||||
set_asset_info_tags(
|
||||
set_reference_tags(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=tags,
|
||||
origin=tag_origin,
|
||||
)
|
||||
touched = True
|
||||
|
||||
if touched and user_metadata is None:
|
||||
update_asset_info_updated_at(session, asset_info_id=asset_info_id)
|
||||
update_reference_updated_at(session, reference_id=reference_id)
|
||||
|
||||
result = fetch_asset_info_asset_and_tags(
|
||||
result = fetch_reference_asset_and_tags(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
owner_id=owner_id,
|
||||
)
|
||||
if not result:
|
||||
raise RuntimeError("State changed during update")
|
||||
|
||||
info, asset, tag_list = result
|
||||
ref, asset, tag_list = result
|
||||
detail = AssetDetailResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tag_list,
|
||||
)
|
||||
@@ -128,16 +128,17 @@ def update_asset_metadata(
|
||||
|
||||
|
||||
def delete_asset_reference(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
owner_id: str,
|
||||
delete_content_if_orphan: bool = True,
|
||||
) -> bool:
|
||||
with create_session() as session:
|
||||
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
|
||||
asset_id = info_row.asset_id if info_row else None
|
||||
ref_row = get_reference_by_id(session, reference_id=reference_id)
|
||||
asset_id = ref_row.asset_id if ref_row else None
|
||||
file_path = ref_row.file_path if ref_row else None
|
||||
|
||||
deleted = delete_asset_info_by_id(
|
||||
session, asset_info_id=asset_info_id, owner_id=owner_id
|
||||
deleted = delete_reference_by_id(
|
||||
session, reference_id=reference_id, owner_id=owner_id
|
||||
)
|
||||
if not deleted:
|
||||
session.commit()
|
||||
@@ -147,16 +148,19 @@ def delete_asset_reference(
|
||||
session.commit()
|
||||
return True
|
||||
|
||||
still_exists = asset_info_exists_for_asset_id(session, asset_id=asset_id)
|
||||
still_exists = reference_exists_for_asset_id(session, asset_id=asset_id)
|
||||
if still_exists:
|
||||
session.commit()
|
||||
return True
|
||||
|
||||
# Orphaned asset - delete it and its files
|
||||
states = list_cache_states_by_asset_id(session, asset_id=asset_id)
|
||||
refs = list_references_by_asset_id(session, asset_id=asset_id)
|
||||
file_paths = [
|
||||
s.file_path for s in (states or []) if getattr(s, "file_path", None)
|
||||
r.file_path for r in (refs or []) if getattr(r, "file_path", None)
|
||||
]
|
||||
# Also include the just-deleted file path
|
||||
if file_path:
|
||||
file_paths.append(file_path)
|
||||
|
||||
asset_row = session.get(Asset, asset_id)
|
||||
if asset_row is not None:
|
||||
@@ -174,32 +178,32 @@ def delete_asset_reference(
|
||||
|
||||
|
||||
def set_asset_preview(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
preview_asset_id: str | None = None,
|
||||
owner_id: str = "",
|
||||
) -> AssetDetailResult:
|
||||
with create_session() as session:
|
||||
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
|
||||
if not info_row:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
if info_row.owner_id and info_row.owner_id != owner_id:
|
||||
ref_row = get_reference_by_id(session, reference_id=reference_id)
|
||||
if not ref_row:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
if ref_row.owner_id and ref_row.owner_id != owner_id:
|
||||
raise PermissionError("not owner")
|
||||
|
||||
set_asset_info_preview(
|
||||
set_reference_preview(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
preview_asset_id=preview_asset_id,
|
||||
)
|
||||
|
||||
result = fetch_asset_info_asset_and_tags(
|
||||
session, asset_info_id=asset_info_id, owner_id=owner_id
|
||||
result = fetch_reference_asset_and_tags(
|
||||
session, reference_id=reference_id, owner_id=owner_id
|
||||
)
|
||||
if not result:
|
||||
raise RuntimeError("State changed during preview update")
|
||||
|
||||
info, asset, tags = result
|
||||
ref, asset, tags = result
|
||||
detail = AssetDetailResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tags,
|
||||
)
|
||||
@@ -231,7 +235,7 @@ def list_assets_page(
|
||||
order: str = "desc",
|
||||
) -> ListAssetsResult:
|
||||
with create_session() as session:
|
||||
infos, tag_map, total = list_asset_infos_page(
|
||||
refs, tag_map, total = list_references_page(
|
||||
session,
|
||||
owner_id=owner_id,
|
||||
include_tags=include_tags,
|
||||
@@ -245,12 +249,12 @@ def list_assets_page(
|
||||
)
|
||||
|
||||
items: list[AssetSummaryData] = []
|
||||
for info in infos:
|
||||
for ref in refs:
|
||||
items.append(
|
||||
AssetSummaryData(
|
||||
info=extract_info_data(info),
|
||||
asset=extract_asset_data(info.asset),
|
||||
tags=tag_map.get(info.id, []),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(ref.asset),
|
||||
tags=tag_map.get(ref.id, []),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -258,33 +262,40 @@ def list_assets_page(
|
||||
|
||||
|
||||
def resolve_asset_for_download(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
owner_id: str = "",
|
||||
) -> DownloadResolutionResult:
|
||||
with create_session() as session:
|
||||
pair = fetch_asset_info_and_asset(
|
||||
session, asset_info_id=asset_info_id, owner_id=owner_id
|
||||
pair = fetch_reference_and_asset(
|
||||
session, reference_id=reference_id, owner_id=owner_id
|
||||
)
|
||||
if not pair:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
|
||||
info, asset = pair
|
||||
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
|
||||
abs_path = select_best_live_path(states)
|
||||
if not abs_path:
|
||||
raise FileNotFoundError(
|
||||
f"No live path for AssetInfo {asset_info_id} (asset id={asset.id}, name={info.name})"
|
||||
)
|
||||
ref, asset = pair
|
||||
|
||||
update_asset_info_access_time(session, asset_info_id=asset_info_id)
|
||||
# For references with file_path, use that directly
|
||||
if ref.file_path and os.path.isfile(ref.file_path):
|
||||
abs_path = ref.file_path
|
||||
else:
|
||||
# For API-created refs without file_path, find a path from other refs
|
||||
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
||||
abs_path = select_best_live_path(refs)
|
||||
if not abs_path:
|
||||
raise FileNotFoundError(
|
||||
f"No live path for AssetReference {reference_id} "
|
||||
f"(asset id={asset.id}, name={ref.name})"
|
||||
)
|
||||
|
||||
update_reference_access_time(session, reference_id=reference_id)
|
||||
session.commit()
|
||||
|
||||
ctype = (
|
||||
asset.mime_type
|
||||
or mimetypes.guess_type(info.name or abs_path)[0]
|
||||
or mimetypes.guess_type(ref.name or abs_path)[0]
|
||||
or "application/octet-stream"
|
||||
)
|
||||
download_name = info.name or os.path.basename(abs_path)
|
||||
download_name = ref.name or os.path.basename(abs_path)
|
||||
return DownloadResolutionResult(
|
||||
abs_path=abs_path,
|
||||
content_type=ctype,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
@@ -10,17 +9,16 @@ from typing import TYPE_CHECKING, Any, TypedDict
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.queries import (
|
||||
bulk_insert_asset_infos_ignore_conflicts,
|
||||
bulk_insert_assets,
|
||||
bulk_insert_cache_states_ignore_conflicts,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
bulk_insert_tags_and_meta,
|
||||
delete_assets_by_ids,
|
||||
get_asset_info_ids_by_ids,
|
||||
get_cache_states_by_paths_and_asset_ids,
|
||||
get_existing_asset_ids,
|
||||
get_reference_ids_by_ids,
|
||||
get_references_by_paths_and_asset_ids,
|
||||
get_unreferenced_unhashed_asset_ids,
|
||||
mark_cache_states_missing_outside_prefixes,
|
||||
restore_cache_states_by_paths,
|
||||
mark_references_missing_outside_prefixes,
|
||||
restore_references_by_paths,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
@@ -52,21 +50,15 @@ class AssetRow(TypedDict):
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class CacheStateRow(TypedDict):
|
||||
"""Row data for inserting a CacheState."""
|
||||
class ReferenceRow(TypedDict):
|
||||
"""Row data for inserting an AssetReference."""
|
||||
|
||||
id: str
|
||||
asset_id: str
|
||||
file_path: str
|
||||
mtime_ns: int
|
||||
|
||||
|
||||
class AssetInfoRow(TypedDict):
|
||||
"""Row data for inserting an AssetInfo."""
|
||||
|
||||
id: str
|
||||
owner_id: str
|
||||
name: str
|
||||
asset_id: str
|
||||
preview_id: str | None
|
||||
user_metadata: dict[str, Any] | None
|
||||
created_at: datetime
|
||||
@@ -74,27 +66,10 @@ class AssetInfoRow(TypedDict):
|
||||
last_access_time: datetime
|
||||
|
||||
|
||||
class AssetInfoRowInternal(TypedDict):
|
||||
"""Internal row data for AssetInfo with extra tracking fields."""
|
||||
|
||||
id: str
|
||||
owner_id: str
|
||||
name: str
|
||||
asset_id: str
|
||||
preview_id: str | None
|
||||
user_metadata: dict[str, Any] | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
last_access_time: datetime
|
||||
_tags: list[str]
|
||||
_filename: str
|
||||
_extracted_metadata: ExtractedMetadata | None
|
||||
|
||||
|
||||
class TagRow(TypedDict):
|
||||
"""Row data for inserting a Tag."""
|
||||
|
||||
asset_info_id: str
|
||||
asset_reference_id: str
|
||||
tag_name: str
|
||||
origin: str
|
||||
added_at: datetime
|
||||
@@ -103,7 +78,7 @@ class TagRow(TypedDict):
|
||||
class MetadataRow(TypedDict):
|
||||
"""Row data for inserting asset metadata."""
|
||||
|
||||
asset_info_id: str
|
||||
asset_reference_id: str
|
||||
key: str
|
||||
ordinal: int
|
||||
val_str: str | None
|
||||
@@ -116,9 +91,9 @@ class MetadataRow(TypedDict):
|
||||
class BulkInsertResult:
|
||||
"""Result of bulk asset insertion."""
|
||||
|
||||
inserted_infos: int
|
||||
won_states: int
|
||||
lost_states: int
|
||||
inserted_refs: int
|
||||
won_paths: int
|
||||
lost_paths: int
|
||||
|
||||
|
||||
def batch_insert_seed_assets(
|
||||
@@ -138,29 +113,28 @@ def batch_insert_seed_assets(
|
||||
|
||||
This function orchestrates:
|
||||
1. Insert seed Assets (hash=NULL)
|
||||
2. Claim cache states with ON CONFLICT DO NOTHING
|
||||
2. Claim references with ON CONFLICT DO NOTHING on file_path
|
||||
3. Query to find winners (paths where our asset_id was inserted)
|
||||
4. Delete Assets for losers (path already claimed by another asset)
|
||||
5. Insert AssetInfo for winners
|
||||
6. Insert tags and metadata for successfully inserted AssetInfos
|
||||
5. Insert tags and metadata for successfully inserted references
|
||||
|
||||
Returns:
|
||||
BulkInsertResult with inserted_infos, won_states, lost_states
|
||||
BulkInsertResult with inserted_refs, won_paths, lost_paths
|
||||
"""
|
||||
if not specs:
|
||||
return BulkInsertResult(inserted_infos=0, won_states=0, lost_states=0)
|
||||
return BulkInsertResult(inserted_refs=0, won_paths=0, lost_paths=0)
|
||||
|
||||
current_time = get_utc_now()
|
||||
asset_rows: list[AssetRow] = []
|
||||
cache_state_rows: list[CacheStateRow] = []
|
||||
reference_rows: list[ReferenceRow] = []
|
||||
path_to_asset_id: dict[str, str] = {}
|
||||
asset_id_to_info: dict[str, AssetInfoRowInternal] = {}
|
||||
asset_id_to_ref_data: dict[str, dict] = {}
|
||||
absolute_path_list: list[str] = []
|
||||
|
||||
for spec in specs:
|
||||
absolute_path = os.path.abspath(spec["abs_path"])
|
||||
asset_id = str(uuid.uuid4())
|
||||
asset_info_id = str(uuid.uuid4())
|
||||
reference_id = str(uuid.uuid4())
|
||||
absolute_path_list.append(absolute_path)
|
||||
path_to_asset_id[absolute_path] = asset_id
|
||||
|
||||
@@ -174,13 +148,7 @@ def batch_insert_seed_assets(
|
||||
"created_at": current_time,
|
||||
}
|
||||
)
|
||||
cache_state_rows.append(
|
||||
{
|
||||
"asset_id": asset_id,
|
||||
"file_path": absolute_path,
|
||||
"mtime_ns": spec["mtime_ns"],
|
||||
}
|
||||
)
|
||||
|
||||
# Build user_metadata from extracted metadata or fallback to filename
|
||||
extracted_metadata = spec.get("metadata")
|
||||
if extracted_metadata:
|
||||
@@ -190,35 +158,43 @@ def batch_insert_seed_assets(
|
||||
else:
|
||||
user_metadata = None
|
||||
|
||||
asset_id_to_info[asset_id] = {
|
||||
"id": asset_info_id,
|
||||
"owner_id": owner_id,
|
||||
"name": spec["info_name"],
|
||||
"asset_id": asset_id,
|
||||
"preview_id": None,
|
||||
"user_metadata": user_metadata,
|
||||
"created_at": current_time,
|
||||
"updated_at": current_time,
|
||||
"last_access_time": current_time,
|
||||
"_tags": spec["tags"],
|
||||
"_filename": spec["fname"],
|
||||
"_extracted_metadata": extracted_metadata,
|
||||
reference_rows.append(
|
||||
{
|
||||
"id": reference_id,
|
||||
"asset_id": asset_id,
|
||||
"file_path": absolute_path,
|
||||
"mtime_ns": spec["mtime_ns"],
|
||||
"owner_id": owner_id,
|
||||
"name": spec["info_name"],
|
||||
"preview_id": None,
|
||||
"user_metadata": user_metadata,
|
||||
"created_at": current_time,
|
||||
"updated_at": current_time,
|
||||
"last_access_time": current_time,
|
||||
}
|
||||
)
|
||||
|
||||
asset_id_to_ref_data[asset_id] = {
|
||||
"reference_id": reference_id,
|
||||
"tags": spec["tags"],
|
||||
"filename": spec["fname"],
|
||||
"extracted_metadata": extracted_metadata,
|
||||
}
|
||||
|
||||
bulk_insert_assets(session, asset_rows)
|
||||
|
||||
# Filter cache states to only those whose assets were actually inserted
|
||||
# Filter reference rows to only those whose assets were actually inserted
|
||||
# (assets with duplicate hashes are silently dropped by ON CONFLICT DO NOTHING)
|
||||
inserted_asset_ids = get_existing_asset_ids(
|
||||
session, [r["asset_id"] for r in cache_state_rows]
|
||||
session, [r["asset_id"] for r in reference_rows]
|
||||
)
|
||||
cache_state_rows = [
|
||||
r for r in cache_state_rows if r["asset_id"] in inserted_asset_ids
|
||||
reference_rows = [
|
||||
r for r in reference_rows if r["asset_id"] in inserted_asset_ids
|
||||
]
|
||||
|
||||
bulk_insert_cache_states_ignore_conflicts(session, cache_state_rows)
|
||||
restore_cache_states_by_paths(session, absolute_path_list)
|
||||
winning_paths = get_cache_states_by_paths_and_asset_ids(session, path_to_asset_id)
|
||||
bulk_insert_references_ignore_conflicts(session, reference_rows)
|
||||
restore_references_by_paths(session, absolute_path_list)
|
||||
winning_paths = get_references_by_paths_and_asset_ids(session, path_to_asset_id)
|
||||
|
||||
all_paths_set = set(absolute_path_list)
|
||||
losing_paths = all_paths_set - winning_paths
|
||||
@@ -229,44 +205,34 @@ def batch_insert_seed_assets(
|
||||
|
||||
if not winning_paths:
|
||||
return BulkInsertResult(
|
||||
inserted_infos=0,
|
||||
won_states=0,
|
||||
lost_states=len(losing_paths),
|
||||
inserted_refs=0,
|
||||
won_paths=0,
|
||||
lost_paths=len(losing_paths),
|
||||
)
|
||||
|
||||
winner_info_rows = [
|
||||
asset_id_to_info[path_to_asset_id[path]] for path in winning_paths
|
||||
# Get reference IDs for winners
|
||||
winning_ref_ids = [
|
||||
asset_id_to_ref_data[path_to_asset_id[path]]["reference_id"]
|
||||
for path in winning_paths
|
||||
]
|
||||
database_info_rows: list[AssetInfoRow] = [
|
||||
{
|
||||
"id": info_row["id"],
|
||||
"owner_id": info_row["owner_id"],
|
||||
"name": info_row["name"],
|
||||
"asset_id": info_row["asset_id"],
|
||||
"preview_id": info_row["preview_id"],
|
||||
"user_metadata": info_row["user_metadata"],
|
||||
"created_at": info_row["created_at"],
|
||||
"updated_at": info_row["updated_at"],
|
||||
"last_access_time": info_row["last_access_time"],
|
||||
}
|
||||
for info_row in winner_info_rows
|
||||
]
|
||||
bulk_insert_asset_infos_ignore_conflicts(session, database_info_rows)
|
||||
|
||||
all_info_ids = [info_row["id"] for info_row in winner_info_rows]
|
||||
inserted_info_ids = get_asset_info_ids_by_ids(session, all_info_ids)
|
||||
inserted_ref_ids = get_reference_ids_by_ids(session, winning_ref_ids)
|
||||
|
||||
tag_rows: list[TagRow] = []
|
||||
metadata_rows: list[MetadataRow] = []
|
||||
if inserted_info_ids:
|
||||
for info_row in winner_info_rows:
|
||||
info_id = info_row["id"]
|
||||
if info_id not in inserted_info_ids:
|
||||
|
||||
if inserted_ref_ids:
|
||||
for path in winning_paths:
|
||||
asset_id = path_to_asset_id[path]
|
||||
ref_data = asset_id_to_ref_data[asset_id]
|
||||
ref_id = ref_data["reference_id"]
|
||||
|
||||
if ref_id not in inserted_ref_ids:
|
||||
continue
|
||||
for tag in info_row["_tags"]:
|
||||
|
||||
for tag in ref_data["tags"]:
|
||||
tag_rows.append(
|
||||
{
|
||||
"asset_info_id": info_id,
|
||||
"asset_reference_id": ref_id,
|
||||
"tag_name": tag,
|
||||
"origin": "automatic",
|
||||
"added_at": current_time,
|
||||
@@ -274,17 +240,17 @@ def batch_insert_seed_assets(
|
||||
)
|
||||
|
||||
# Use extracted metadata for meta rows if available
|
||||
extracted_metadata = info_row.get("_extracted_metadata")
|
||||
extracted_metadata = ref_data.get("extracted_metadata")
|
||||
if extracted_metadata:
|
||||
metadata_rows.extend(extracted_metadata.to_meta_rows(info_id))
|
||||
elif info_row["_filename"]:
|
||||
metadata_rows.extend(extracted_metadata.to_meta_rows(ref_id))
|
||||
elif ref_data["filename"]:
|
||||
# Fallback: just store filename
|
||||
metadata_rows.append(
|
||||
{
|
||||
"asset_info_id": info_id,
|
||||
"asset_reference_id": ref_id,
|
||||
"key": "filename",
|
||||
"ordinal": 0,
|
||||
"val_str": info_row["_filename"],
|
||||
"val_str": ref_data["filename"],
|
||||
"val_num": None,
|
||||
"val_bool": None,
|
||||
"val_json": None,
|
||||
@@ -294,40 +260,36 @@ def batch_insert_seed_assets(
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=metadata_rows)
|
||||
|
||||
return BulkInsertResult(
|
||||
inserted_infos=len(inserted_info_ids),
|
||||
won_states=len(winning_paths),
|
||||
lost_states=len(losing_paths),
|
||||
inserted_refs=len(inserted_ref_ids),
|
||||
won_paths=len(winning_paths),
|
||||
lost_paths=len(losing_paths),
|
||||
)
|
||||
|
||||
|
||||
def mark_assets_missing_outside_prefixes(
|
||||
session: Session, valid_prefixes: list[str]
|
||||
) -> int:
|
||||
"""Mark cache states as missing when outside valid prefixes.
|
||||
"""Mark references as missing when outside valid prefixes.
|
||||
|
||||
This is a non-destructive operation that soft-deletes cache states
|
||||
This is a non-destructive operation that soft-deletes references
|
||||
by setting is_missing=True. User metadata is preserved and assets
|
||||
can be restored if the file reappears in a future scan.
|
||||
|
||||
Note: This does NOT delete
|
||||
unreferenced unhashed assets. Those are preserved so user metadata
|
||||
remains intact even when base directories change.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
valid_prefixes: List of absolute directory prefixes that are valid
|
||||
|
||||
Returns:
|
||||
Number of cache states marked as missing
|
||||
Number of references marked as missing
|
||||
"""
|
||||
return mark_cache_states_missing_outside_prefixes(session, valid_prefixes)
|
||||
return mark_references_missing_outside_prefixes(session, valid_prefixes)
|
||||
|
||||
|
||||
def cleanup_unreferenced_assets(session: Session) -> int:
|
||||
"""Hard-delete unhashed assets with no active cache states.
|
||||
"""Hard-delete unhashed assets with no active references.
|
||||
|
||||
This is a destructive operation intended for explicit cleanup.
|
||||
Only deletes assets where hash=None and all cache states are missing.
|
||||
Only deletes assets where hash=None and all references are missing.
|
||||
|
||||
Returns:
|
||||
Number of assets deleted
|
||||
|
||||
@@ -8,24 +8,23 @@ from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
import app.assets.services.hashing as hashing
|
||||
from app.assets.database.models import Asset, AssetInfo, Tag
|
||||
from app.assets.database.models import Asset, AssetReference, Tag
|
||||
from app.assets.database.queries import (
|
||||
add_tags_to_asset_info,
|
||||
fetch_asset_info_and_asset,
|
||||
add_tags_to_reference,
|
||||
fetch_reference_and_asset,
|
||||
get_asset_by_hash,
|
||||
get_asset_tags,
|
||||
get_or_create_asset_info,
|
||||
get_reference_tags,
|
||||
get_or_create_reference,
|
||||
remove_missing_tag_for_asset_id,
|
||||
set_asset_info_metadata,
|
||||
set_asset_info_tags,
|
||||
update_asset_info_timestamps,
|
||||
set_reference_metadata,
|
||||
set_reference_tags,
|
||||
upsert_asset,
|
||||
upsert_cache_state,
|
||||
upsert_reference,
|
||||
)
|
||||
from app.assets.helpers import normalize_tags
|
||||
from app.assets.services.file_utils import get_size_and_mtime_ns
|
||||
from app.assets.services.path_utils import (
|
||||
compute_filename_for_asset,
|
||||
compute_filename_for_reference,
|
||||
resolve_destination_from_tags,
|
||||
validate_path_within_base,
|
||||
)
|
||||
@@ -35,7 +34,7 @@ from app.assets.services.schemas import (
|
||||
UploadResult,
|
||||
UserMetadata,
|
||||
extract_asset_data,
|
||||
extract_info_data,
|
||||
extract_reference_data,
|
||||
)
|
||||
from app.database.db import create_session
|
||||
|
||||
@@ -58,9 +57,9 @@ def _ingest_file_from_path(
|
||||
|
||||
asset_created = False
|
||||
asset_updated = False
|
||||
state_created = False
|
||||
state_updated = False
|
||||
asset_info_id: str | None = None
|
||||
ref_created = False
|
||||
ref_updated = False
|
||||
reference_id: str | None = None
|
||||
|
||||
with create_session() as session:
|
||||
if preview_id:
|
||||
@@ -74,49 +73,42 @@ def _ingest_file_from_path(
|
||||
mime_type=mime_type,
|
||||
)
|
||||
|
||||
state_created, state_updated = upsert_cache_state(
|
||||
ref_created, ref_updated = upsert_reference(
|
||||
session,
|
||||
asset_id=asset.id,
|
||||
file_path=locator,
|
||||
name=info_name or os.path.basename(locator),
|
||||
mtime_ns=mtime_ns,
|
||||
owner_id=owner_id,
|
||||
)
|
||||
|
||||
if info_name:
|
||||
info, info_created = get_or_create_asset_info(
|
||||
session,
|
||||
asset_id=asset.id,
|
||||
owner_id=owner_id,
|
||||
name=info_name,
|
||||
preview_id=preview_id,
|
||||
)
|
||||
if info_created:
|
||||
asset_info_id = info.id
|
||||
else:
|
||||
update_asset_info_timestamps(
|
||||
session, asset_info=info, preview_id=preview_id
|
||||
)
|
||||
asset_info_id = info.id
|
||||
# Get the reference we just created/updated
|
||||
from app.assets.database.queries import get_reference_by_file_path
|
||||
ref = get_reference_by_file_path(session, locator)
|
||||
if ref:
|
||||
reference_id = ref.id
|
||||
|
||||
if preview_id and ref.preview_id != preview_id:
|
||||
ref.preview_id = preview_id
|
||||
|
||||
norm = normalize_tags(list(tags))
|
||||
if norm and asset_info_id:
|
||||
if norm:
|
||||
if require_existing_tags:
|
||||
_validate_tags_exist(session, norm)
|
||||
add_tags_to_asset_info(
|
||||
add_tags_to_reference(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=norm,
|
||||
origin=tag_origin,
|
||||
create_if_missing=not require_existing_tags,
|
||||
)
|
||||
|
||||
if asset_info_id:
|
||||
_update_metadata_with_filename(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
asset_id=asset.id,
|
||||
info=info,
|
||||
user_metadata=user_metadata,
|
||||
)
|
||||
_update_metadata_with_filename(
|
||||
session,
|
||||
reference_id=reference_id,
|
||||
ref=ref,
|
||||
user_metadata=user_metadata,
|
||||
)
|
||||
|
||||
try:
|
||||
remove_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
@@ -128,9 +120,9 @@ def _ingest_file_from_path(
|
||||
return IngestResult(
|
||||
asset_created=asset_created,
|
||||
asset_updated=asset_updated,
|
||||
state_created=state_created,
|
||||
state_updated=state_updated,
|
||||
asset_info_id=asset_info_id,
|
||||
ref_created=ref_created,
|
||||
ref_updated=ref_updated,
|
||||
reference_id=reference_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -147,18 +139,17 @@ def _register_existing_asset(
|
||||
if not asset:
|
||||
raise ValueError(f"No asset with hash {asset_hash}")
|
||||
|
||||
info, info_created = get_or_create_asset_info(
|
||||
ref, ref_created = get_or_create_reference(
|
||||
session,
|
||||
asset_id=asset.id,
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
preview_id=None,
|
||||
)
|
||||
|
||||
if not info_created:
|
||||
tag_names = get_asset_tags(session, asset_info_id=info.id)
|
||||
if not ref_created:
|
||||
tag_names = get_reference_tags(session, reference_id=ref.id)
|
||||
result = RegisterAssetResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tag_names,
|
||||
created=False,
|
||||
@@ -167,29 +158,29 @@ def _register_existing_asset(
|
||||
return result
|
||||
|
||||
new_meta = dict(user_metadata or {})
|
||||
computed_filename = compute_filename_for_asset(session, asset.id)
|
||||
computed_filename = compute_filename_for_reference(session, ref)
|
||||
if computed_filename:
|
||||
new_meta["filename"] = computed_filename
|
||||
|
||||
if new_meta:
|
||||
set_asset_info_metadata(
|
||||
set_reference_metadata(
|
||||
session,
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
user_metadata=new_meta,
|
||||
)
|
||||
|
||||
if tags is not None:
|
||||
set_asset_info_tags(
|
||||
set_reference_tags(
|
||||
session,
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=tags,
|
||||
origin=tag_origin,
|
||||
)
|
||||
|
||||
tag_names = get_asset_tags(session, asset_info_id=info.id)
|
||||
session.refresh(info)
|
||||
tag_names = get_reference_tags(session, reference_id=ref.id)
|
||||
session.refresh(ref)
|
||||
result = RegisterAssetResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tag_names,
|
||||
created=True,
|
||||
@@ -211,14 +202,13 @@ def _validate_tags_exist(session: Session, tags: list[str]) -> None:
|
||||
|
||||
def _update_metadata_with_filename(
|
||||
session: Session,
|
||||
asset_info_id: str,
|
||||
asset_id: str,
|
||||
info: AssetInfo,
|
||||
reference_id: str,
|
||||
ref: AssetReference,
|
||||
user_metadata: UserMetadata,
|
||||
) -> None:
|
||||
computed_filename = compute_filename_for_asset(session, asset_id)
|
||||
computed_filename = compute_filename_for_reference(session, ref)
|
||||
|
||||
current_meta = info.user_metadata or {}
|
||||
current_meta = ref.user_metadata or {}
|
||||
new_meta = dict(current_meta)
|
||||
if user_metadata:
|
||||
for k, v in user_metadata.items():
|
||||
@@ -227,9 +217,9 @@ def _update_metadata_with_filename(
|
||||
new_meta["filename"] = computed_filename
|
||||
|
||||
if new_meta != current_meta:
|
||||
set_asset_info_metadata(
|
||||
set_reference_metadata(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
user_metadata=new_meta,
|
||||
)
|
||||
|
||||
@@ -287,7 +277,7 @@ def upload_from_temp_path(
|
||||
owner_id=owner_id,
|
||||
)
|
||||
return UploadResult(
|
||||
info=result.info,
|
||||
ref=result.ref,
|
||||
asset=result.asset,
|
||||
tags=result.tags,
|
||||
created_new=False,
|
||||
@@ -334,21 +324,21 @@ def upload_from_temp_path(
|
||||
tag_origin="manual",
|
||||
require_existing_tags=False,
|
||||
)
|
||||
info_id = ingest_result.asset_info_id
|
||||
if not info_id:
|
||||
raise RuntimeError("failed to create asset metadata")
|
||||
reference_id = ingest_result.reference_id
|
||||
if not reference_id:
|
||||
raise RuntimeError("failed to create asset reference")
|
||||
|
||||
with create_session() as session:
|
||||
pair = fetch_asset_info_and_asset(
|
||||
session, asset_info_id=info_id, owner_id=owner_id
|
||||
pair = fetch_reference_and_asset(
|
||||
session, reference_id=reference_id, owner_id=owner_id
|
||||
)
|
||||
if not pair:
|
||||
raise RuntimeError("inconsistent DB state after ingest")
|
||||
info, asset = pair
|
||||
tag_names = get_asset_tags(session, asset_info_id=info.id)
|
||||
ref, asset = pair
|
||||
tag_names = get_reference_tags(session, reference_id=ref.id)
|
||||
|
||||
return UploadResult(
|
||||
info=extract_info_data(info),
|
||||
ref=extract_reference_data(ref),
|
||||
asset=extract_asset_data(asset),
|
||||
tags=tag_names,
|
||||
created_new=ingest_result.asset_created,
|
||||
@@ -381,7 +371,7 @@ def create_from_hash(
|
||||
)
|
||||
|
||||
return UploadResult(
|
||||
info=result.info,
|
||||
ref=result.ref,
|
||||
asset=result.asset,
|
||||
tags=result.tags,
|
||||
created_new=False,
|
||||
|
||||
@@ -52,6 +52,7 @@ class ExtractedMetadata:
|
||||
|
||||
# Tier 1: Filesystem (always available)
|
||||
filename: str = ""
|
||||
file_path: str = "" # Full absolute path to the file
|
||||
content_length: int = 0
|
||||
content_type: str | None = None
|
||||
format: str = "" # file extension without dot
|
||||
@@ -76,12 +77,14 @@ class ExtractedMetadata:
|
||||
resolve_url: str | None = None
|
||||
|
||||
def to_user_metadata(self) -> dict[str, Any]:
|
||||
"""Convert to user_metadata dict for AssetInfo.user_metadata JSON field."""
|
||||
"""Convert to user_metadata dict for AssetReference.user_metadata JSON field."""
|
||||
data: dict[str, Any] = {
|
||||
"filename": self.filename,
|
||||
"content_length": self.content_length,
|
||||
"format": self.format,
|
||||
}
|
||||
if self.file_path:
|
||||
data["file_path"] = self.file_path
|
||||
if self.content_type:
|
||||
data["content_type"] = self.content_type
|
||||
|
||||
@@ -119,14 +122,14 @@ class ExtractedMetadata:
|
||||
|
||||
return data
|
||||
|
||||
def to_meta_rows(self, asset_info_id: str) -> list[dict]:
|
||||
"""Convert to asset_info_meta rows for typed/indexed querying."""
|
||||
def to_meta_rows(self, reference_id: str) -> list[dict]:
|
||||
"""Convert to asset_reference_meta rows for typed/indexed querying."""
|
||||
rows: list[dict] = []
|
||||
|
||||
def add_str(key: str, val: str | None, ordinal: int = 0) -> None:
|
||||
if val:
|
||||
rows.append({
|
||||
"asset_info_id": asset_info_id,
|
||||
"asset_reference_id": reference_id,
|
||||
"key": key,
|
||||
"ordinal": ordinal,
|
||||
"val_str": val[:2048] if len(val) > 2048 else val,
|
||||
@@ -138,7 +141,7 @@ class ExtractedMetadata:
|
||||
def add_num(key: str, val: int | float | None) -> None:
|
||||
if val is not None:
|
||||
rows.append({
|
||||
"asset_info_id": asset_info_id,
|
||||
"asset_reference_id": reference_id,
|
||||
"key": key,
|
||||
"ordinal": 0,
|
||||
"val_str": None,
|
||||
@@ -150,7 +153,7 @@ class ExtractedMetadata:
|
||||
def add_bool(key: str, val: bool | None) -> None:
|
||||
if val is not None:
|
||||
rows.append({
|
||||
"asset_info_id": asset_info_id,
|
||||
"asset_reference_id": reference_id,
|
||||
"key": key,
|
||||
"ordinal": 0,
|
||||
"val_str": None,
|
||||
@@ -168,7 +171,8 @@ class ExtractedMetadata:
|
||||
# Tier 2
|
||||
add_str("base_model", self.base_model)
|
||||
add_str("air", self.air)
|
||||
add_bool("has_preview_images", self.has_preview_images if self.has_preview_images else None)
|
||||
has_previews = self.has_preview_images if self.has_preview_images else None
|
||||
add_bool("has_preview_images", has_previews)
|
||||
|
||||
# trained_words as multiple rows with ordinals
|
||||
if self.trained_words:
|
||||
@@ -191,7 +195,9 @@ class ExtractedMetadata:
|
||||
return rows
|
||||
|
||||
|
||||
def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE) -> dict[str, Any] | None:
|
||||
def _read_safetensors_header(
|
||||
path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE
|
||||
) -> dict[str, Any] | None:
|
||||
"""Read only the JSON header from a safetensors file.
|
||||
|
||||
This is very fast - reads 8 bytes for header length, then the JSON header.
|
||||
@@ -220,7 +226,9 @@ def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_S
|
||||
return None
|
||||
|
||||
|
||||
def _extract_safetensors_metadata(header: dict[str, Any], meta: ExtractedMetadata) -> None:
|
||||
def _extract_safetensors_metadata(
|
||||
header: dict[str, Any], meta: ExtractedMetadata
|
||||
) -> None:
|
||||
"""Extract metadata from safetensors header __metadata__ section.
|
||||
|
||||
Modifies meta in-place.
|
||||
@@ -230,7 +238,11 @@ def _extract_safetensors_metadata(header: dict[str, Any], meta: ExtractedMetadat
|
||||
return
|
||||
|
||||
# Common model metadata
|
||||
meta.base_model = st_meta.get("ss_base_model_version") or st_meta.get("modelspec.base_model") or st_meta.get("base_model")
|
||||
meta.base_model = (
|
||||
st_meta.get("ss_base_model_version")
|
||||
or st_meta.get("modelspec.base_model")
|
||||
or st_meta.get("base_model")
|
||||
)
|
||||
|
||||
# Trained words / trigger words
|
||||
trained_words = st_meta.get("ss_tag_frequency")
|
||||
@@ -304,8 +316,8 @@ def extract_file_metadata(
|
||||
meta = ExtractedMetadata()
|
||||
|
||||
# Tier 1: Filesystem metadata
|
||||
# Use relative_filename if provided (for backward compatibility with existing behavior)
|
||||
meta.filename = relative_filename if relative_filename else os.path.basename(abs_path)
|
||||
meta.filename = relative_filename or os.path.basename(abs_path)
|
||||
meta.file_path = abs_path
|
||||
_, ext = os.path.splitext(abs_path)
|
||||
meta.format = ext.lstrip(".").lower() if ext else ""
|
||||
|
||||
@@ -333,6 +345,6 @@ def extract_file_metadata(
|
||||
try:
|
||||
_extract_safetensors_metadata(header, meta)
|
||||
except Exception as e:
|
||||
logging.debug("Failed to extract safetensors metadata from %s: %s", abs_path, e)
|
||||
logging.debug("Safetensors meta extract failed %s: %s", abs_path, e)
|
||||
|
||||
return meta
|
||||
|
||||
@@ -7,18 +7,15 @@ from app.assets.helpers import normalize_tags
|
||||
|
||||
|
||||
def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
|
||||
"""Build a list of (folder_name, base_paths[]) categories that are configured for model locations.
|
||||
"""Build list of (folder_name, base_paths[]) for model locations.
|
||||
|
||||
We trust `folder_paths.folder_names_and_paths` and include a category if
|
||||
*any* of its base paths lies under the Comfy `models_dir`.
|
||||
Includes a category if any of its base paths lies under models_dir.
|
||||
"""
|
||||
targets: list[tuple[str, list[str]]] = []
|
||||
models_root = os.path.abspath(folder_paths.models_dir)
|
||||
for name, values in folder_paths.folder_names_and_paths.items():
|
||||
paths, _exts = (
|
||||
values[0],
|
||||
values[1],
|
||||
) # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI
|
||||
# Unpack carefully to handle nodepacks that modify folder_paths
|
||||
paths, _exts = values[0], values[1]
|
||||
if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths):
|
||||
targets.append((name, paths))
|
||||
return targets
|
||||
@@ -70,7 +67,6 @@ def compute_relative_filename(file_path: str) -> str | None:
|
||||
/.../models/text_encoders/clip_g.safetensors -> "clip_g.safetensors"
|
||||
|
||||
For non-model paths, returns None.
|
||||
NOTE: this is a temporary helper, used only for initializing metadata["filename"] field.
|
||||
"""
|
||||
try:
|
||||
root_category, rel_path = get_asset_category_and_relative_path(file_path)
|
||||
@@ -92,18 +88,18 @@ def compute_relative_filename(file_path: str) -> str | None:
|
||||
def get_asset_category_and_relative_path(
|
||||
file_path: str,
|
||||
) -> tuple[Literal["input", "output", "models"], str]:
|
||||
"""Given an absolute or relative file path, determine which root category the path belongs to:
|
||||
- 'input' if the file resides under `folder_paths.get_input_directory()`
|
||||
- 'output' if the file resides under `folder_paths.get_output_directory()`
|
||||
- 'models' if the file resides under any base path of categories returned by `get_comfy_models_folders()`
|
||||
"""Determine which root category a file path belongs to.
|
||||
|
||||
Categories:
|
||||
- 'input': under folder_paths.get_input_directory()
|
||||
- 'output': under folder_paths.get_output_directory()
|
||||
- 'models': under any base path from get_comfy_models_folders()
|
||||
|
||||
Returns:
|
||||
(root_category, relative_path_inside_that_root)
|
||||
For 'models', the relative path is prefixed with the category name:
|
||||
e.g. ('models', 'vae/test/sub/ae.safetensors')
|
||||
|
||||
Raises:
|
||||
ValueError: if the path does not belong to input, output, or configured model bases.
|
||||
ValueError: path does not belong to any known root.
|
||||
"""
|
||||
fp_abs = os.path.abspath(file_path)
|
||||
|
||||
@@ -149,32 +145,35 @@ def get_asset_category_and_relative_path(
|
||||
)
|
||||
|
||||
|
||||
def compute_filename_for_reference(session, ref) -> str | None:
|
||||
"""Compute the relative filename for an asset reference.
|
||||
|
||||
Uses the file_path from the reference if available.
|
||||
"""
|
||||
if ref.file_path:
|
||||
return compute_relative_filename(ref.file_path)
|
||||
return None
|
||||
|
||||
|
||||
def compute_filename_for_asset(session, asset_id: str) -> str | None:
|
||||
"""Compute the relative filename for an asset from its best live cache state path."""
|
||||
from app.assets.database.queries import list_cache_states_by_asset_id
|
||||
"""Compute the relative filename for an asset from its best live reference path."""
|
||||
from app.assets.database.queries import list_references_by_asset_id
|
||||
from app.assets.helpers import select_best_live_path
|
||||
|
||||
primary_path = select_best_live_path(
|
||||
list_cache_states_by_asset_id(session, asset_id=asset_id)
|
||||
list_references_by_asset_id(session, asset_id=asset_id)
|
||||
)
|
||||
return compute_relative_filename(primary_path) if primary_path else None
|
||||
|
||||
|
||||
def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
|
||||
"""Return a tuple (name, tags) derived from a filesystem path.
|
||||
"""Return (name, tags) derived from a filesystem path.
|
||||
|
||||
Semantics:
|
||||
- Root category is determined by `get_asset_category_and_relative_path`.
|
||||
- The returned `name` is the base filename with extension from the relative path.
|
||||
- The returned `tags` are:
|
||||
[root_category] + parent folders of the relative path (in order)
|
||||
For 'models', this means:
|
||||
file '/.../ModelsDir/vae/test_tag/ae.safetensors'
|
||||
-> root_category='models', some_path='vae/test_tag/ae.safetensors'
|
||||
-> name='ae.safetensors', tags=['models', 'vae', 'test_tag']
|
||||
- name: base filename with extension
|
||||
- tags: [root_category] + parent folder names in order
|
||||
|
||||
Raises:
|
||||
ValueError: if the path does not belong to input, output, or configured model bases.
|
||||
ValueError: path does not belong to any known root.
|
||||
"""
|
||||
root_category, some_path = get_asset_category_and_relative_path(file_path)
|
||||
p = Path(some_path)
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, NamedTuple
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
|
||||
UserMetadata = dict[str, Any] | None
|
||||
|
||||
@@ -15,9 +15,12 @@ class AssetData:
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AssetInfoData:
|
||||
class ReferenceData:
|
||||
"""Data transfer object for AssetReference."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
file_path: str | None
|
||||
user_metadata: UserMetadata
|
||||
preview_id: str | None
|
||||
created_at: datetime
|
||||
@@ -27,14 +30,14 @@ class AssetInfoData:
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AssetDetailResult:
|
||||
info: AssetInfoData
|
||||
ref: ReferenceData
|
||||
asset: AssetData | None
|
||||
tags: list[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegisterAssetResult:
|
||||
info: AssetInfoData
|
||||
ref: ReferenceData
|
||||
asset: AssetData
|
||||
tags: list[str]
|
||||
created: bool
|
||||
@@ -44,9 +47,9 @@ class RegisterAssetResult:
|
||||
class IngestResult:
|
||||
asset_created: bool
|
||||
asset_updated: bool
|
||||
state_created: bool
|
||||
state_updated: bool
|
||||
asset_info_id: str | None
|
||||
ref_created: bool
|
||||
ref_updated: bool
|
||||
reference_id: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -78,7 +81,7 @@ class TagUsage(NamedTuple):
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AssetSummaryData:
|
||||
info: AssetInfoData
|
||||
ref: ReferenceData
|
||||
asset: AssetData | None
|
||||
tags: list[str]
|
||||
|
||||
@@ -98,21 +101,22 @@ class DownloadResolutionResult:
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UploadResult:
|
||||
info: AssetInfoData
|
||||
ref: ReferenceData
|
||||
asset: AssetData
|
||||
tags: list[str]
|
||||
created_new: bool
|
||||
|
||||
|
||||
def extract_info_data(info: AssetInfo) -> AssetInfoData:
|
||||
return AssetInfoData(
|
||||
id=info.id,
|
||||
name=info.name,
|
||||
user_metadata=info.user_metadata,
|
||||
preview_id=info.preview_id,
|
||||
created_at=info.created_at,
|
||||
updated_at=info.updated_at,
|
||||
last_access_time=info.last_access_time,
|
||||
def extract_reference_data(ref: AssetReference) -> ReferenceData:
|
||||
return ReferenceData(
|
||||
id=ref.id,
|
||||
name=ref.name,
|
||||
file_path=ref.file_path,
|
||||
user_metadata=ref.user_metadata,
|
||||
preview_id=ref.preview_id,
|
||||
created_at=ref.created_at,
|
||||
updated_at=ref.updated_at,
|
||||
last_access_time=ref.last_access_time,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
from app.assets.database.queries import (
|
||||
add_tags_to_asset_info,
|
||||
get_asset_info_by_id,
|
||||
add_tags_to_reference,
|
||||
get_reference_by_id,
|
||||
list_tags_with_usage,
|
||||
remove_tags_from_asset_info,
|
||||
remove_tags_from_reference,
|
||||
)
|
||||
from app.assets.services.schemas import AddTagsResult, RemoveTagsResult, TagUsage
|
||||
from app.database.db import create_session
|
||||
|
||||
|
||||
def apply_tags(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
tags: list[str],
|
||||
origin: str = "manual",
|
||||
owner_id: str = "",
|
||||
) -> AddTagsResult:
|
||||
with create_session() as session:
|
||||
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
|
||||
if not info_row:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
if info_row.owner_id and info_row.owner_id != owner_id:
|
||||
ref_row = get_reference_by_id(session, reference_id=reference_id)
|
||||
if not ref_row:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
if ref_row.owner_id and ref_row.owner_id != owner_id:
|
||||
raise PermissionError("not owner")
|
||||
|
||||
data = add_tags_to_asset_info(
|
||||
data = add_tags_to_reference(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=tags,
|
||||
origin=origin,
|
||||
create_if_missing=True,
|
||||
asset_info_row=info_row,
|
||||
reference_row=ref_row,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
@@ -39,20 +39,20 @@ def apply_tags(
|
||||
|
||||
|
||||
def remove_tags(
|
||||
asset_info_id: str,
|
||||
reference_id: str,
|
||||
tags: list[str],
|
||||
owner_id: str = "",
|
||||
) -> RemoveTagsResult:
|
||||
with create_session() as session:
|
||||
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
|
||||
if not info_row:
|
||||
raise ValueError(f"AssetInfo {asset_info_id} not found")
|
||||
if info_row.owner_id and info_row.owner_id != owner_id:
|
||||
ref_row = get_reference_by_id(session, reference_id=reference_id)
|
||||
if not ref_row:
|
||||
raise ValueError(f"AssetReference {reference_id} not found")
|
||||
if ref_row.owner_id and ref_row.owner_id != owner_id:
|
||||
raise PermissionError("not owner")
|
||||
|
||||
data = remove_tags_from_asset_info(
|
||||
data = remove_tags_from_reference(
|
||||
session,
|
||||
asset_info_id=asset_info_id,
|
||||
reference_id=reference_id,
|
||||
tags=tags,
|
||||
)
|
||||
session.commit()
|
||||
|
||||
@@ -3,24 +3,24 @@ import uuid
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
|
||||
from app.assets.database.queries import (
|
||||
asset_info_exists_for_asset_id,
|
||||
get_asset_info_by_id,
|
||||
insert_asset_info,
|
||||
get_or_create_asset_info,
|
||||
update_asset_info_timestamps,
|
||||
list_asset_infos_page,
|
||||
fetch_asset_info_asset_and_tags,
|
||||
fetch_asset_info_and_asset,
|
||||
update_asset_info_access_time,
|
||||
set_asset_info_metadata,
|
||||
delete_asset_info_by_id,
|
||||
set_asset_info_preview,
|
||||
bulk_insert_asset_infos_ignore_conflicts,
|
||||
get_asset_info_ids_by_ids,
|
||||
reference_exists_for_asset_id,
|
||||
get_reference_by_id,
|
||||
insert_reference,
|
||||
get_or_create_reference,
|
||||
update_reference_timestamps,
|
||||
list_references_page,
|
||||
fetch_reference_asset_and_tags,
|
||||
fetch_reference_and_asset,
|
||||
update_reference_access_time,
|
||||
set_reference_metadata,
|
||||
delete_reference_by_id,
|
||||
set_reference_preview,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
get_reference_ids_by_ids,
|
||||
ensure_tags_exist,
|
||||
add_tags_to_asset_info,
|
||||
add_tags_to_reference,
|
||||
)
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
@@ -32,14 +32,14 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
|
||||
return asset
|
||||
|
||||
|
||||
def _make_asset_info(
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetInfo:
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
@@ -47,381 +47,386 @@ def _make_asset_info(
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return info
|
||||
return ref
|
||||
|
||||
|
||||
class TestAssetInfoExistsForAssetId:
|
||||
def test_returns_false_when_no_info(self, session: Session):
|
||||
class TestReferenceExistsForAssetId:
|
||||
def test_returns_false_when_no_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is False
|
||||
assert reference_exists_for_asset_id(session, asset_id=asset.id) is False
|
||||
|
||||
def test_returns_true_when_info_exists(self, session: Session):
|
||||
def test_returns_true_when_reference_exists(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset)
|
||||
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is True
|
||||
_make_reference(session, asset)
|
||||
assert reference_exists_for_asset_id(session, asset_id=asset.id) is True
|
||||
|
||||
|
||||
class TestGetAssetInfoById:
|
||||
class TestGetReferenceById:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
assert get_asset_info_by_id(session, asset_info_id="nonexistent") is None
|
||||
assert get_reference_by_id(session, reference_id="nonexistent") is None
|
||||
|
||||
def test_returns_info(self, session: Session):
|
||||
def test_returns_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset, name="myfile.txt")
|
||||
ref = _make_reference(session, asset, name="myfile.txt")
|
||||
|
||||
result = get_asset_info_by_id(session, asset_info_id=info.id)
|
||||
result = get_reference_by_id(session, reference_id=ref.id)
|
||||
assert result is not None
|
||||
assert result.name == "myfile.txt"
|
||||
|
||||
|
||||
class TestListAssetInfosPage:
|
||||
class TestListReferencesPage:
|
||||
def test_empty_db(self, session: Session):
|
||||
infos, tag_map, total = list_asset_infos_page(session)
|
||||
assert infos == []
|
||||
refs, tag_map, total = list_references_page(session)
|
||||
assert refs == []
|
||||
assert tag_map == {}
|
||||
assert total == 0
|
||||
|
||||
def test_returns_infos_with_tags(self, session: Session):
|
||||
def test_returns_references_with_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset, name="test.bin")
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["alpha", "beta"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
|
||||
session.commit()
|
||||
|
||||
infos, tag_map, total = list_asset_infos_page(session)
|
||||
assert len(infos) == 1
|
||||
assert infos[0].id == info.id
|
||||
assert set(tag_map[info.id]) == {"alpha", "beta"}
|
||||
refs, tag_map, total = list_references_page(session)
|
||||
assert len(refs) == 1
|
||||
assert refs[0].id == ref.id
|
||||
assert set(tag_map[ref.id]) == {"alpha", "beta"}
|
||||
assert total == 1
|
||||
|
||||
def test_name_contains_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, name="model_v1.safetensors")
|
||||
_make_asset_info(session, asset, name="config.json")
|
||||
_make_reference(session, asset, name="model_v1.safetensors")
|
||||
_make_reference(session, asset, name="config.json")
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, name_contains="model")
|
||||
refs, _, total = list_references_page(session, name_contains="model")
|
||||
assert total == 1
|
||||
assert infos[0].name == "model_v1.safetensors"
|
||||
assert refs[0].name == "model_v1.safetensors"
|
||||
|
||||
def test_owner_visibility(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, name="public", owner_id="")
|
||||
_make_asset_info(session, asset, name="private", owner_id="user1")
|
||||
_make_reference(session, asset, name="public", owner_id="")
|
||||
_make_reference(session, asset, name="private", owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
# Empty owner sees only public
|
||||
infos, _, total = list_asset_infos_page(session, owner_id="")
|
||||
refs, _, total = list_references_page(session, owner_id="")
|
||||
assert total == 1
|
||||
assert infos[0].name == "public"
|
||||
assert refs[0].name == "public"
|
||||
|
||||
# Owner sees both
|
||||
infos, _, total = list_asset_infos_page(session, owner_id="user1")
|
||||
refs, _, total = list_references_page(session, owner_id="user1")
|
||||
assert total == 2
|
||||
|
||||
def test_include_tags_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info1 = _make_asset_info(session, asset, name="tagged")
|
||||
_make_asset_info(session, asset, name="untagged")
|
||||
ref1 = _make_reference(session, asset, name="tagged")
|
||||
_make_reference(session, asset, name="untagged")
|
||||
ensure_tags_exist(session, ["wanted"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info1.id, tags=["wanted"])
|
||||
add_tags_to_reference(session, reference_id=ref1.id, tags=["wanted"])
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, include_tags=["wanted"])
|
||||
refs, _, total = list_references_page(session, include_tags=["wanted"])
|
||||
assert total == 1
|
||||
assert infos[0].name == "tagged"
|
||||
assert refs[0].name == "tagged"
|
||||
|
||||
def test_exclude_tags_filter(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, name="keep")
|
||||
info_exclude = _make_asset_info(session, asset, name="exclude")
|
||||
_make_reference(session, asset, name="keep")
|
||||
ref_exclude = _make_reference(session, asset, name="exclude")
|
||||
ensure_tags_exist(session, ["bad"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info_exclude.id, tags=["bad"])
|
||||
add_tags_to_reference(session, reference_id=ref_exclude.id, tags=["bad"])
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, exclude_tags=["bad"])
|
||||
refs, _, total = list_references_page(session, exclude_tags=["bad"])
|
||||
assert total == 1
|
||||
assert infos[0].name == "keep"
|
||||
assert refs[0].name == "keep"
|
||||
|
||||
def test_sorting(self, session: Session):
|
||||
asset = _make_asset(session, "hash1", size=100)
|
||||
asset2 = _make_asset(session, "hash2", size=500)
|
||||
_make_asset_info(session, asset, name="small")
|
||||
_make_asset_info(session, asset2, name="large")
|
||||
_make_reference(session, asset, name="small")
|
||||
_make_reference(session, asset2, name="large")
|
||||
session.commit()
|
||||
|
||||
infos, _, _ = list_asset_infos_page(session, sort="size", order="desc")
|
||||
assert infos[0].name == "large"
|
||||
refs, _, _ = list_references_page(session, sort="size", order="desc")
|
||||
assert refs[0].name == "large"
|
||||
|
||||
infos, _, _ = list_asset_infos_page(session, sort="name", order="asc")
|
||||
assert infos[0].name == "large"
|
||||
refs, _, _ = list_references_page(session, sort="name", order="asc")
|
||||
assert refs[0].name == "large"
|
||||
|
||||
|
||||
class TestFetchAssetInfoAssetAndTags:
|
||||
class TestFetchReferenceAssetAndTags:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
result = fetch_asset_info_asset_and_tags(session, "nonexistent")
|
||||
result = fetch_reference_asset_and_tags(session, "nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_tuple(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset, name="test.bin")
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["tag1"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["tag1"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["tag1"])
|
||||
session.commit()
|
||||
|
||||
result = fetch_asset_info_asset_and_tags(session, info.id)
|
||||
result = fetch_reference_asset_and_tags(session, ref.id)
|
||||
assert result is not None
|
||||
ret_info, ret_asset, ret_tags = result
|
||||
assert ret_info.id == info.id
|
||||
ret_ref, ret_asset, ret_tags = result
|
||||
assert ret_ref.id == ref.id
|
||||
assert ret_asset.id == asset.id
|
||||
assert ret_tags == ["tag1"]
|
||||
|
||||
|
||||
class TestFetchAssetInfoAndAsset:
|
||||
class TestFetchReferenceAndAsset:
|
||||
def test_returns_none_for_nonexistent(self, session: Session):
|
||||
result = fetch_asset_info_and_asset(session, asset_info_id="nonexistent")
|
||||
result = fetch_reference_and_asset(session, reference_id="nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_tuple(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = fetch_asset_info_and_asset(session, asset_info_id=info.id)
|
||||
result = fetch_reference_and_asset(session, reference_id=ref.id)
|
||||
assert result is not None
|
||||
ret_info, ret_asset = result
|
||||
assert ret_info.id == info.id
|
||||
ret_ref, ret_asset = result
|
||||
assert ret_ref.id == ref.id
|
||||
assert ret_asset.id == asset.id
|
||||
|
||||
|
||||
class TestUpdateAssetInfoAccessTime:
|
||||
class TestUpdateReferenceAccessTime:
|
||||
def test_updates_last_access_time(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
original_time = info.last_access_time
|
||||
ref = _make_reference(session, asset)
|
||||
original_time = ref.last_access_time
|
||||
session.commit()
|
||||
|
||||
import time
|
||||
time.sleep(0.01)
|
||||
|
||||
update_asset_info_access_time(session, asset_info_id=info.id)
|
||||
update_reference_access_time(session, reference_id=ref.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.last_access_time > original_time
|
||||
session.refresh(ref)
|
||||
assert ref.last_access_time > original_time
|
||||
|
||||
|
||||
class TestDeleteAssetInfoById:
|
||||
class TestDeleteReferenceById:
|
||||
def test_deletes_existing(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="")
|
||||
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="")
|
||||
assert result is True
|
||||
assert get_asset_info_by_id(session, asset_info_id=info.id) is None
|
||||
assert get_reference_by_id(session, reference_id=ref.id) is None
|
||||
|
||||
def test_returns_false_for_nonexistent(self, session: Session):
|
||||
result = delete_asset_info_by_id(session, asset_info_id="nonexistent", owner_id="")
|
||||
result = delete_reference_by_id(session, reference_id="nonexistent", owner_id="")
|
||||
assert result is False
|
||||
|
||||
def test_respects_owner_visibility(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="user2")
|
||||
result = delete_reference_by_id(session, reference_id=ref.id, owner_id="user2")
|
||||
assert result is False
|
||||
assert get_asset_info_by_id(session, asset_info_id=info.id) is not None
|
||||
assert get_reference_by_id(session, reference_id=ref.id) is not None
|
||||
|
||||
|
||||
class TestSetAssetInfoPreview:
|
||||
class TestSetReferencePreview:
|
||||
def test_sets_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=preview_asset.id)
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.preview_id == preview_asset.id
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id == preview_asset.id
|
||||
|
||||
def test_clears_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
info = _make_asset_info(session, asset)
|
||||
info.preview_id = preview_asset.id
|
||||
ref = _make_reference(session, asset)
|
||||
ref.preview_id = preview_asset.id
|
||||
session.commit()
|
||||
|
||||
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=None)
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id=None)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.preview_id is None
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id is None
|
||||
|
||||
def test_raises_for_nonexistent_info(self, session: Session):
|
||||
def test_raises_for_nonexistent_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_asset_info_preview(session, asset_info_id="nonexistent", preview_asset_id=None)
|
||||
set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None)
|
||||
|
||||
def test_raises_for_nonexistent_preview(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(ValueError, match="Preview Asset"):
|
||||
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id="nonexistent")
|
||||
set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent")
|
||||
|
||||
|
||||
class TestInsertAssetInfo:
|
||||
def test_creates_new_info(self, session: Session):
|
||||
class TestInsertReference:
|
||||
def test_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = insert_asset_info(
|
||||
ref = insert_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="test.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert info is not None
|
||||
assert info.name == "test.bin"
|
||||
assert info.owner_id == "user1"
|
||||
assert ref is not None
|
||||
assert ref.name == "test.bin"
|
||||
assert ref.owner_id == "user1"
|
||||
|
||||
def test_returns_none_on_conflict(self, session: Session):
|
||||
def test_allows_duplicate_names(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
insert_asset_info(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
|
||||
ref1 = insert_reference(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
|
||||
session.commit()
|
||||
|
||||
# Attempt duplicate with same (asset_id, owner_id, name)
|
||||
result = insert_asset_info(
|
||||
# Duplicate names are now allowed
|
||||
ref2 = insert_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="dup.bin"
|
||||
)
|
||||
assert result is None
|
||||
session.commit()
|
||||
|
||||
assert ref1 is not None
|
||||
assert ref2 is not None
|
||||
assert ref1.id != ref2.id
|
||||
|
||||
|
||||
class TestGetOrCreateAssetInfo:
|
||||
def test_creates_new_info(self, session: Session):
|
||||
class TestGetOrCreateReference:
|
||||
def test_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info, created = get_or_create_asset_info(
|
||||
ref, created = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="new.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is True
|
||||
assert info.name == "new.bin"
|
||||
assert ref.name == "new.bin"
|
||||
|
||||
def test_returns_existing_info(self, session: Session):
|
||||
def test_always_creates_new_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info1, created1 = get_or_create_asset_info(
|
||||
ref1, created1 = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
info2, created2 = get_or_create_asset_info(
|
||||
# Duplicate names are allowed, so always creates new
|
||||
ref2, created2 = get_or_create_reference(
|
||||
session, asset_id=asset.id, owner_id="user1", name="existing.bin"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created1 is True
|
||||
assert created2 is False
|
||||
assert info1.id == info2.id
|
||||
assert created2 is True
|
||||
assert ref1.id != ref2.id
|
||||
|
||||
|
||||
class TestUpdateAssetInfoTimestamps:
|
||||
class TestUpdateReferenceTimestamps:
|
||||
def test_updates_timestamps(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
original_updated_at = info.updated_at
|
||||
ref = _make_reference(session, asset)
|
||||
original_updated_at = ref.updated_at
|
||||
session.commit()
|
||||
|
||||
time.sleep(0.01)
|
||||
update_asset_info_timestamps(session, info)
|
||||
update_reference_timestamps(session, ref)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.updated_at > original_updated_at
|
||||
session.refresh(ref)
|
||||
assert ref.updated_at > original_updated_at
|
||||
|
||||
def test_updates_preview_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
preview_asset = _make_asset(session, "preview_hash")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
update_asset_info_timestamps(session, info, preview_id=preview_asset.id)
|
||||
update_reference_timestamps(session, ref, preview_id=preview_asset.id)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.preview_id == preview_asset.id
|
||||
session.refresh(ref)
|
||||
assert ref.preview_id == preview_asset.id
|
||||
|
||||
|
||||
class TestSetAssetInfoMetadata:
|
||||
class TestSetReferenceMetadata:
|
||||
def test_sets_metadata(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=info.id, user_metadata={"key": "value"}
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"key": "value"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.user_metadata == {"key": "value"}
|
||||
session.refresh(ref)
|
||||
assert ref.user_metadata == {"key": "value"}
|
||||
# Check metadata table
|
||||
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "key"
|
||||
assert meta[0].val_str == "value"
|
||||
|
||||
def test_replaces_existing_metadata(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=info.id, user_metadata={"old": "data"}
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"old": "data"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=info.id, user_metadata={"new": "data"}
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"new": "data"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "new"
|
||||
|
||||
def test_clears_metadata_with_empty_dict(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=info.id, user_metadata={"key": "value"}
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={"key": "value"}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id=info.id, user_metadata={}
|
||||
set_reference_metadata(
|
||||
session, reference_id=ref.id, user_metadata={}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
session.refresh(info)
|
||||
assert info.user_metadata == {}
|
||||
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
|
||||
session.refresh(ref)
|
||||
assert ref.user_metadata == {}
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 0
|
||||
|
||||
def test_raises_for_nonexistent(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_asset_info_metadata(
|
||||
session, asset_info_id="nonexistent", user_metadata={"key": "value"}
|
||||
set_reference_metadata(
|
||||
session, reference_id="nonexistent", user_metadata={"key": "value"}
|
||||
)
|
||||
|
||||
|
||||
class TestBulkInsertAssetInfosIgnoreConflicts:
|
||||
def test_inserts_multiple_infos(self, session: Session):
|
||||
class TestBulkInsertReferencesIgnoreConflicts:
|
||||
def test_inserts_multiple_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
@@ -448,15 +453,15 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_asset_infos_ignore_conflicts(session, rows)
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
infos = session.query(AssetInfo).all()
|
||||
assert len(infos) == 2
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 2
|
||||
|
||||
def test_ignores_conflicts(self, session: Session):
|
||||
def test_allows_duplicate_names(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, name="existing.bin", owner_id="")
|
||||
_make_reference(session, asset, name="existing.bin", owner_id="")
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
@@ -484,28 +489,29 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_asset_infos_ignore_conflicts(session, rows)
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
infos = session.query(AssetInfo).all()
|
||||
assert len(infos) == 2 # existing + new, not 3
|
||||
# Duplicate names allowed, so all 3 rows exist
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 3
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
bulk_insert_asset_infos_ignore_conflicts(session, [])
|
||||
assert session.query(AssetInfo).count() == 0
|
||||
bulk_insert_references_ignore_conflicts(session, [])
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
|
||||
class TestGetAssetInfoIdsByIds:
|
||||
class TestGetReferenceIdsByIds:
|
||||
def test_returns_existing_ids(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info1 = _make_asset_info(session, asset, name="a.bin")
|
||||
info2 = _make_asset_info(session, asset, name="b.bin")
|
||||
ref1 = _make_reference(session, asset, name="a.bin")
|
||||
ref2 = _make_reference(session, asset, name="b.bin")
|
||||
session.commit()
|
||||
|
||||
found = get_asset_info_ids_by_ids(session, [info1.id, info2.id, "nonexistent"])
|
||||
found = get_reference_ids_by_ids(session, [ref1.id, ref2.id, "nonexistent"])
|
||||
|
||||
assert found == {info1.id, info2.id}
|
||||
assert found == {ref1.id, ref2.id}
|
||||
|
||||
def test_empty_list_returns_empty(self, session: Session):
|
||||
found = get_asset_info_ids_by_ids(session, [])
|
||||
found = get_reference_ids_by_ids(session, [])
|
||||
assert found == set()
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
"""Tests for cache_state query functions."""
|
||||
"""Tests for cache_state (AssetReference file path) query functions."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetCacheState, AssetInfo
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import (
|
||||
list_cache_states_by_asset_id,
|
||||
upsert_cache_state,
|
||||
list_references_by_asset_id,
|
||||
upsert_reference,
|
||||
get_unreferenced_unhashed_asset_ids,
|
||||
delete_assets_by_ids,
|
||||
get_cache_states_for_prefixes,
|
||||
get_references_for_prefixes,
|
||||
bulk_update_needs_verify,
|
||||
delete_cache_states_by_ids,
|
||||
delete_references_by_ids,
|
||||
delete_orphaned_seed_asset,
|
||||
bulk_insert_cache_states_ignore_conflicts,
|
||||
get_cache_states_by_paths_and_asset_ids,
|
||||
mark_cache_states_missing_outside_prefixes,
|
||||
restore_cache_states_by_paths,
|
||||
bulk_insert_references_ignore_conflicts,
|
||||
get_references_by_paths_and_asset_ids,
|
||||
mark_references_missing_outside_prefixes,
|
||||
restore_references_by_paths,
|
||||
)
|
||||
from app.assets.helpers import select_best_live_path, get_utc_now
|
||||
|
||||
@@ -27,49 +27,55 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
|
||||
return asset
|
||||
|
||||
|
||||
def _make_cache_state(
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
file_path: str,
|
||||
name: str = "test",
|
||||
mtime_ns: int | None = None,
|
||||
needs_verify: bool = False,
|
||||
) -> AssetCacheState:
|
||||
state = AssetCacheState(
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
ref = AssetReference(
|
||||
asset_id=asset.id,
|
||||
file_path=file_path,
|
||||
name=name,
|
||||
mtime_ns=mtime_ns,
|
||||
needs_verify=needs_verify,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(state)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return state
|
||||
return ref
|
||||
|
||||
|
||||
class TestListCacheStatesByAssetId:
|
||||
def test_returns_empty_for_no_states(self, session: Session):
|
||||
class TestListReferencesByAssetId:
|
||||
def test_returns_empty_for_no_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
|
||||
assert list(states) == []
|
||||
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
||||
assert list(refs) == []
|
||||
|
||||
def test_returns_states_for_asset(self, session: Session):
|
||||
def test_returns_references_for_asset(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_cache_state(session, asset, "/path/a.bin")
|
||||
_make_cache_state(session, asset, "/path/b.bin")
|
||||
_make_reference(session, asset, "/path/a.bin", name="a")
|
||||
_make_reference(session, asset, "/path/b.bin", name="b")
|
||||
session.commit()
|
||||
|
||||
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
|
||||
paths = [s.file_path for s in states]
|
||||
refs = list_references_by_asset_id(session, asset_id=asset.id)
|
||||
paths = [r.file_path for r in refs]
|
||||
assert set(paths) == {"/path/a.bin", "/path/b.bin"}
|
||||
|
||||
def test_does_not_return_other_assets_states(self, session: Session):
|
||||
def test_does_not_return_other_assets_references(self, session: Session):
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
_make_cache_state(session, asset1, "/path/asset1.bin")
|
||||
_make_cache_state(session, asset2, "/path/asset2.bin")
|
||||
_make_reference(session, asset1, "/path/asset1.bin", name="a1")
|
||||
_make_reference(session, asset2, "/path/asset2.bin", name="a2")
|
||||
session.commit()
|
||||
|
||||
states = list_cache_states_by_asset_id(session, asset_id=asset1.id)
|
||||
paths = [s.file_path for s in states]
|
||||
refs = list_references_by_asset_id(session, asset_id=asset1.id)
|
||||
paths = [r.file_path for r in refs]
|
||||
assert paths == ["/path/asset1.bin"]
|
||||
|
||||
|
||||
@@ -80,10 +86,10 @@ class TestSelectBestLivePath:
|
||||
|
||||
def test_returns_empty_when_no_files_exist(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
state = _make_cache_state(session, asset, "/nonexistent/path.bin")
|
||||
ref = _make_reference(session, asset, "/nonexistent/path.bin")
|
||||
session.commit()
|
||||
|
||||
result = select_best_live_path([state])
|
||||
result = select_best_live_path([ref])
|
||||
assert result == ""
|
||||
|
||||
def test_prefers_verified_path(self, session: Session, tmp_path):
|
||||
@@ -96,124 +102,125 @@ class TestSelectBestLivePath:
|
||||
unverified_file = tmp_path / "unverified.bin"
|
||||
unverified_file.write_bytes(b"data")
|
||||
|
||||
state_verified = _make_cache_state(
|
||||
session, asset, str(verified_file), needs_verify=False
|
||||
ref_verified = _make_reference(
|
||||
session, asset, str(verified_file), name="verified", needs_verify=False
|
||||
)
|
||||
state_unverified = _make_cache_state(
|
||||
session, asset, str(unverified_file), needs_verify=True
|
||||
ref_unverified = _make_reference(
|
||||
session, asset, str(unverified_file), name="unverified", needs_verify=True
|
||||
)
|
||||
session.commit()
|
||||
|
||||
states = [state_unverified, state_verified]
|
||||
result = select_best_live_path(states)
|
||||
refs = [ref_unverified, ref_verified]
|
||||
result = select_best_live_path(refs)
|
||||
assert result == str(verified_file)
|
||||
|
||||
def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
|
||||
"""If all states need verification, return first existing path."""
|
||||
"""If all references need verification, return first existing path."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
|
||||
existing_file = tmp_path / "exists.bin"
|
||||
existing_file.write_bytes(b"data")
|
||||
|
||||
state = _make_cache_state(session, asset, str(existing_file), needs_verify=True)
|
||||
ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
|
||||
session.commit()
|
||||
|
||||
result = select_best_live_path([state])
|
||||
result = select_best_live_path([ref])
|
||||
assert result == str(existing_file)
|
||||
|
||||
|
||||
class TestSelectBestLivePathWithMocking:
|
||||
def test_handles_missing_file_path_attr(self):
|
||||
"""Gracefully handle states with None file_path."""
|
||||
"""Gracefully handle references with None file_path."""
|
||||
|
||||
class MockState:
|
||||
class MockRef:
|
||||
file_path = None
|
||||
needs_verify = False
|
||||
|
||||
result = select_best_live_path([MockState()])
|
||||
result = select_best_live_path([MockRef()])
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestUpsertCacheState:
|
||||
class TestUpsertReference:
|
||||
@pytest.mark.parametrize(
|
||||
"initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
|
||||
[
|
||||
# New state creation
|
||||
# New reference creation
|
||||
(None, 12345, True, False, 12345),
|
||||
# Existing state, same mtime - no update
|
||||
# Existing reference, same mtime - no update
|
||||
(100, 100, False, False, 100),
|
||||
# Existing state, different mtime - update
|
||||
# Existing reference, different mtime - update
|
||||
(100, 200, False, True, 200),
|
||||
],
|
||||
ids=["new_state", "existing_no_change", "existing_update_mtime"],
|
||||
ids=["new_reference", "existing_no_change", "existing_update_mtime"],
|
||||
)
|
||||
def test_upsert_scenarios(
|
||||
self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
|
||||
name = f"file_{initial_mtime}_{second_mtime}"
|
||||
|
||||
# Create initial state if needed
|
||||
# Create initial reference if needed
|
||||
if initial_mtime is not None:
|
||||
upsert_cache_state(session, asset_id=asset.id, file_path=file_path, mtime_ns=initial_mtime)
|
||||
upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
|
||||
session.commit()
|
||||
|
||||
# The upsert call we're testing
|
||||
created, updated = upsert_cache_state(
|
||||
session, asset_id=asset.id, file_path=file_path, mtime_ns=second_mtime
|
||||
created, updated = upsert_reference(
|
||||
session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is expect_created
|
||||
assert updated is expect_updated
|
||||
state = session.query(AssetCacheState).filter_by(file_path=file_path).one()
|
||||
assert state.mtime_ns == final_mtime
|
||||
ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
||||
assert ref.mtime_ns == final_mtime
|
||||
|
||||
def test_upsert_restores_missing_state(self, session: Session):
|
||||
"""Upserting a cache state that was marked missing should restore it."""
|
||||
def test_upsert_restores_missing_reference(self, session: Session):
|
||||
"""Upserting a reference that was marked missing should restore it."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
file_path = "/restored/file.bin"
|
||||
|
||||
state = _make_cache_state(session, asset, file_path, mtime_ns=100)
|
||||
state.is_missing = True
|
||||
ref = _make_reference(session, asset, file_path, mtime_ns=100)
|
||||
ref.is_missing = True
|
||||
session.commit()
|
||||
|
||||
created, updated = upsert_cache_state(
|
||||
session, asset_id=asset.id, file_path=file_path, mtime_ns=100
|
||||
created, updated = upsert_reference(
|
||||
session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
|
||||
)
|
||||
session.commit()
|
||||
|
||||
assert created is False
|
||||
assert updated is True
|
||||
restored_state = session.query(AssetCacheState).filter_by(file_path=file_path).one()
|
||||
assert restored_state.is_missing is False
|
||||
restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
|
||||
assert restored_ref.is_missing is False
|
||||
|
||||
|
||||
class TestRestoreCacheStatesByPaths:
|
||||
def test_restores_missing_states(self, session: Session):
|
||||
class TestRestoreReferencesByPaths:
|
||||
def test_restores_missing_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
missing_path = "/missing/file.bin"
|
||||
active_path = "/active/file.bin"
|
||||
|
||||
missing_state = _make_cache_state(session, asset, missing_path)
|
||||
missing_state.is_missing = True
|
||||
_make_cache_state(session, asset, active_path)
|
||||
missing_ref = _make_reference(session, asset, missing_path, name="missing")
|
||||
missing_ref.is_missing = True
|
||||
_make_reference(session, asset, active_path, name="active")
|
||||
session.commit()
|
||||
|
||||
restored = restore_cache_states_by_paths(session, [missing_path])
|
||||
restored = restore_references_by_paths(session, [missing_path])
|
||||
session.commit()
|
||||
|
||||
assert restored == 1
|
||||
state = session.query(AssetCacheState).filter_by(file_path=missing_path).one()
|
||||
assert state.is_missing is False
|
||||
ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
|
||||
assert ref.is_missing is False
|
||||
|
||||
def test_empty_list_restores_nothing(self, session: Session):
|
||||
restored = restore_cache_states_by_paths(session, [])
|
||||
restored = restore_references_by_paths(session, [])
|
||||
assert restored == 0
|
||||
|
||||
|
||||
class TestMarkCacheStatesMissingOutsidePrefixes:
|
||||
def test_marks_states_missing_outside_prefixes(self, session: Session, tmp_path):
|
||||
class TestMarkReferencesMissingOutsidePrefixes:
|
||||
def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
|
||||
asset = _make_asset(session, "hash1")
|
||||
valid_dir = tmp_path / "valid"
|
||||
valid_dir.mkdir()
|
||||
@@ -223,63 +230,58 @@ class TestMarkCacheStatesMissingOutsidePrefixes:
|
||||
valid_path = str(valid_dir / "file.bin")
|
||||
invalid_path = str(invalid_dir / "file.bin")
|
||||
|
||||
_make_cache_state(session, asset, valid_path)
|
||||
_make_cache_state(session, asset, invalid_path)
|
||||
_make_reference(session, asset, valid_path, name="valid")
|
||||
_make_reference(session, asset, invalid_path, name="invalid")
|
||||
session.commit()
|
||||
|
||||
marked = mark_cache_states_missing_outside_prefixes(session, [str(valid_dir)])
|
||||
marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
|
||||
session.commit()
|
||||
|
||||
assert marked == 1
|
||||
all_states = session.query(AssetCacheState).all()
|
||||
assert len(all_states) == 2
|
||||
all_refs = session.query(AssetReference).all()
|
||||
assert len(all_refs) == 2
|
||||
|
||||
valid_state = next(s for s in all_states if s.file_path == valid_path)
|
||||
invalid_state = next(s for s in all_states if s.file_path == invalid_path)
|
||||
assert valid_state.is_missing is False
|
||||
assert invalid_state.is_missing is True
|
||||
valid_ref = next(r for r in all_refs if r.file_path == valid_path)
|
||||
invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
|
||||
assert valid_ref.is_missing is False
|
||||
assert invalid_ref.is_missing is True
|
||||
|
||||
def test_empty_prefixes_marks_nothing(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_cache_state(session, asset, "/some/path.bin")
|
||||
_make_reference(session, asset, "/some/path.bin")
|
||||
session.commit()
|
||||
|
||||
marked = mark_cache_states_missing_outside_prefixes(session, [])
|
||||
marked = mark_references_missing_outside_prefixes(session, [])
|
||||
|
||||
assert marked == 0
|
||||
|
||||
|
||||
class TestGetUnreferencedUnhashedAssetIds:
|
||||
def test_returns_unreferenced_unhashed_assets(self, session: Session):
|
||||
# Unhashed asset (hash=None) with no cache states
|
||||
no_states = _make_asset(session, hash_val=None)
|
||||
# Unhashed asset with active cache state (not unreferenced)
|
||||
with_active_state = _make_asset(session, hash_val=None)
|
||||
_make_cache_state(session, with_active_state, "/has/state.bin")
|
||||
# Unhashed asset with only missing cache state (should be unreferenced)
|
||||
with_missing_state = _make_asset(session, hash_val=None)
|
||||
missing_state = _make_cache_state(session, with_missing_state, "/missing/state.bin")
|
||||
missing_state.is_missing = True
|
||||
# Unhashed asset (hash=None) with no references (no file_path)
|
||||
no_refs = _make_asset(session, hash_val=None)
|
||||
# Unhashed asset with active reference (not unreferenced)
|
||||
with_active_ref = _make_asset(session, hash_val=None)
|
||||
_make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
|
||||
# Unhashed asset with only missing reference (should be unreferenced)
|
||||
with_missing_ref = _make_asset(session, hash_val=None)
|
||||
missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
|
||||
missing_ref.is_missing = True
|
||||
# Regular asset (hash not None) - should not be returned
|
||||
_make_asset(session, hash_val="blake3:regular")
|
||||
session.commit()
|
||||
|
||||
unreferenced = get_unreferenced_unhashed_asset_ids(session)
|
||||
|
||||
assert no_states.id in unreferenced
|
||||
assert with_missing_state.id in unreferenced
|
||||
assert with_active_state.id not in unreferenced
|
||||
assert no_refs.id in unreferenced
|
||||
assert with_missing_ref.id in unreferenced
|
||||
assert with_active_ref.id not in unreferenced
|
||||
|
||||
|
||||
class TestDeleteAssetsByIds:
|
||||
def test_deletes_assets_and_infos(self, session: Session):
|
||||
def test_deletes_assets_and_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
owner_id="", name="test", asset_id=asset.id,
|
||||
created_at=now, updated_at=now, last_access_time=now
|
||||
)
|
||||
session.add(info)
|
||||
_make_reference(session, asset, "/test/path.bin", name="test")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_assets_by_ids(session, [asset.id])
|
||||
@@ -287,7 +289,7 @@ class TestDeleteAssetsByIds:
|
||||
|
||||
assert deleted == 1
|
||||
assert session.query(Asset).count() == 0
|
||||
assert session.query(AssetInfo).count() == 0
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
def test_empty_list_deletes_nothing(self, session: Session):
|
||||
_make_asset(session, "hash1")
|
||||
@@ -299,8 +301,8 @@ class TestDeleteAssetsByIds:
|
||||
assert session.query(Asset).count() == 1
|
||||
|
||||
|
||||
class TestGetCacheStatesForPrefixes:
|
||||
def test_returns_states_matching_prefix(self, session: Session, tmp_path):
|
||||
class TestGetReferencesForPrefixes:
|
||||
def test_returns_references_matching_prefix(self, session: Session, tmp_path):
|
||||
asset = _make_asset(session, "hash1")
|
||||
dir1 = tmp_path / "dir1"
|
||||
dir1.mkdir()
|
||||
@@ -310,21 +312,21 @@ class TestGetCacheStatesForPrefixes:
|
||||
path1 = str(dir1 / "file.bin")
|
||||
path2 = str(dir2 / "file.bin")
|
||||
|
||||
_make_cache_state(session, asset, path1, mtime_ns=100)
|
||||
_make_cache_state(session, asset, path2, mtime_ns=200)
|
||||
_make_reference(session, asset, path1, name="file1", mtime_ns=100)
|
||||
_make_reference(session, asset, path2, name="file2", mtime_ns=200)
|
||||
session.commit()
|
||||
|
||||
rows = get_cache_states_for_prefixes(session, [str(dir1)])
|
||||
rows = get_references_for_prefixes(session, [str(dir1)])
|
||||
|
||||
assert len(rows) == 1
|
||||
assert rows[0].file_path == path1
|
||||
|
||||
def test_empty_prefixes_returns_empty(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_cache_state(session, asset, "/some/path.bin")
|
||||
_make_reference(session, asset, "/some/path.bin")
|
||||
session.commit()
|
||||
|
||||
rows = get_cache_states_for_prefixes(session, [])
|
||||
rows = get_references_for_prefixes(session, [])
|
||||
|
||||
assert rows == []
|
||||
|
||||
@@ -332,39 +334,39 @@ class TestGetCacheStatesForPrefixes:
|
||||
class TestBulkSetNeedsVerify:
|
||||
def test_sets_needs_verify_flag(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
state1 = _make_cache_state(session, asset, "/path1.bin", needs_verify=False)
|
||||
state2 = _make_cache_state(session, asset, "/path2.bin", needs_verify=False)
|
||||
ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
|
||||
ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
|
||||
session.commit()
|
||||
|
||||
updated = bulk_update_needs_verify(session, [state1.id, state2.id], True)
|
||||
updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
|
||||
session.commit()
|
||||
|
||||
assert updated == 2
|
||||
session.refresh(state1)
|
||||
session.refresh(state2)
|
||||
assert state1.needs_verify is True
|
||||
assert state2.needs_verify is True
|
||||
session.refresh(ref1)
|
||||
session.refresh(ref2)
|
||||
assert ref1.needs_verify is True
|
||||
assert ref2.needs_verify is True
|
||||
|
||||
def test_empty_list_updates_nothing(self, session: Session):
|
||||
updated = bulk_update_needs_verify(session, [], True)
|
||||
assert updated == 0
|
||||
|
||||
|
||||
class TestDeleteCacheStatesByIds:
|
||||
def test_deletes_states_by_id(self, session: Session):
|
||||
class TestDeleteReferencesByIds:
|
||||
def test_deletes_references_by_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
state1 = _make_cache_state(session, asset, "/path1.bin")
|
||||
_make_cache_state(session, asset, "/path2.bin")
|
||||
ref1 = _make_reference(session, asset, "/path1.bin")
|
||||
_make_reference(session, asset, "/path2.bin")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_cache_states_by_ids(session, [state1.id])
|
||||
deleted = delete_references_by_ids(session, [ref1.id])
|
||||
session.commit()
|
||||
|
||||
assert deleted == 1
|
||||
assert session.query(AssetCacheState).count() == 1
|
||||
assert session.query(AssetReference).count() == 1
|
||||
|
||||
def test_empty_list_deletes_nothing(self, session: Session):
|
||||
deleted = delete_cache_states_by_ids(session, [])
|
||||
deleted = delete_references_by_ids(session, [])
|
||||
assert deleted == 0
|
||||
|
||||
|
||||
@@ -384,12 +386,7 @@ class TestDeleteOrphanedSeedAsset:
|
||||
if create_asset:
|
||||
asset = _make_asset(session, hash_val=None)
|
||||
asset_id = asset.id
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
owner_id="", name="test", asset_id=asset.id,
|
||||
created_at=now, updated_at=now, last_access_time=now
|
||||
)
|
||||
session.add(info)
|
||||
_make_reference(session, asset, "/test/path.bin", name="test")
|
||||
session.commit()
|
||||
|
||||
deleted = delete_orphaned_seed_asset(session, asset_id)
|
||||
@@ -400,53 +397,87 @@ class TestDeleteOrphanedSeedAsset:
|
||||
assert session.query(Asset).count() == expected_count
|
||||
|
||||
|
||||
class TestBulkInsertCacheStatesIgnoreConflicts:
|
||||
def test_inserts_multiple_states(self, session: Session):
|
||||
class TestBulkInsertReferencesIgnoreConflicts:
|
||||
def test_inserts_multiple_references(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{"asset_id": asset.id, "file_path": "/bulk1.bin", "mtime_ns": 100},
|
||||
{"asset_id": asset.id, "file_path": "/bulk2.bin", "mtime_ns": 200},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/bulk1.bin",
|
||||
"name": "bulk1",
|
||||
"mtime_ns": 100,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/bulk2.bin",
|
||||
"name": "bulk2",
|
||||
"mtime_ns": 200,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_cache_states_ignore_conflicts(session, rows)
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
assert session.query(AssetCacheState).count() == 2
|
||||
assert session.query(AssetReference).count() == 2
|
||||
|
||||
def test_ignores_conflicts(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_cache_state(session, asset, "/existing.bin", mtime_ns=100)
|
||||
_make_reference(session, asset, "/existing.bin", mtime_ns=100)
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
rows = [
|
||||
{"asset_id": asset.id, "file_path": "/existing.bin", "mtime_ns": 999},
|
||||
{"asset_id": asset.id, "file_path": "/new.bin", "mtime_ns": 200},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/existing.bin",
|
||||
"name": "existing",
|
||||
"mtime_ns": 999,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
{
|
||||
"asset_id": asset.id,
|
||||
"file_path": "/new.bin",
|
||||
"name": "new",
|
||||
"mtime_ns": 200,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
},
|
||||
]
|
||||
bulk_insert_cache_states_ignore_conflicts(session, rows)
|
||||
bulk_insert_references_ignore_conflicts(session, rows)
|
||||
session.commit()
|
||||
|
||||
assert session.query(AssetCacheState).count() == 2
|
||||
existing = session.query(AssetCacheState).filter_by(file_path="/existing.bin").one()
|
||||
assert session.query(AssetReference).count() == 2
|
||||
existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
|
||||
assert existing.mtime_ns == 100 # Original value preserved
|
||||
|
||||
def test_empty_list_is_noop(self, session: Session):
|
||||
bulk_insert_cache_states_ignore_conflicts(session, [])
|
||||
assert session.query(AssetCacheState).count() == 0
|
||||
bulk_insert_references_ignore_conflicts(session, [])
|
||||
assert session.query(AssetReference).count() == 0
|
||||
|
||||
|
||||
class TestGetCacheStatesByPathsAndAssetIds:
|
||||
class TestGetReferencesByPathsAndAssetIds:
|
||||
def test_returns_matching_paths(self, session: Session):
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
|
||||
_make_cache_state(session, asset1, "/path1.bin")
|
||||
_make_cache_state(session, asset2, "/path2.bin")
|
||||
_make_reference(session, asset1, "/path1.bin")
|
||||
_make_reference(session, asset2, "/path2.bin")
|
||||
session.commit()
|
||||
|
||||
path_to_asset = {
|
||||
"/path1.bin": asset1.id,
|
||||
"/path2.bin": asset2.id,
|
||||
}
|
||||
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset)
|
||||
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
||||
|
||||
assert winners == {"/path1.bin", "/path2.bin"}
|
||||
|
||||
@@ -454,15 +485,15 @@ class TestGetCacheStatesByPathsAndAssetIds:
|
||||
asset1 = _make_asset(session, "hash1")
|
||||
asset2 = _make_asset(session, "hash2")
|
||||
|
||||
_make_cache_state(session, asset1, "/path1.bin")
|
||||
_make_reference(session, asset1, "/path1.bin")
|
||||
session.commit()
|
||||
|
||||
# Path exists but with different asset_id
|
||||
path_to_asset = {"/path1.bin": asset2.id}
|
||||
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset)
|
||||
winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
|
||||
|
||||
assert winners == set()
|
||||
|
||||
def test_empty_dict_returns_empty(self, session: Session):
|
||||
winners = get_cache_states_by_paths_and_asset_ids(session, {})
|
||||
winners = get_references_by_paths_and_asset_ids(session, {})
|
||||
assert winners == set()
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""Tests for metadata filtering logic in asset_info queries."""
|
||||
"""Tests for metadata filtering logic in asset_reference queries."""
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta
|
||||
from app.assets.database.queries import list_asset_infos_page
|
||||
from app.assets.database.queries.asset_info import convert_metadata_to_rows
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
|
||||
from app.assets.database.queries import list_references_page
|
||||
from app.assets.database.queries.asset_reference import convert_metadata_to_rows
|
||||
from app.assets.helpers import get_utc_now
|
||||
|
||||
|
||||
@@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str) -> Asset:
|
||||
return asset
|
||||
|
||||
|
||||
def _make_asset_info(
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str,
|
||||
metadata: dict | None = None,
|
||||
) -> AssetInfo:
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id="",
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
@@ -31,14 +31,14 @@ def _make_asset_info(
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
|
||||
if metadata:
|
||||
for key, val in metadata.items():
|
||||
for row in convert_metadata_to_rows(key, val):
|
||||
meta_row = AssetInfoMeta(
|
||||
asset_info_id=info.id,
|
||||
meta_row = AssetReferenceMeta(
|
||||
asset_reference_id=ref.id,
|
||||
key=row["key"],
|
||||
ordinal=row.get("ordinal", 0),
|
||||
val_str=row.get("val_str"),
|
||||
@@ -49,7 +49,7 @@ def _make_asset_info(
|
||||
session.add(meta_row)
|
||||
session.flush()
|
||||
|
||||
return info
|
||||
return ref
|
||||
|
||||
|
||||
class TestMetadataFilterByType:
|
||||
@@ -75,15 +75,15 @@ class TestMetadataFilterByType:
|
||||
self, session: Session, match_meta, nomatch_meta, filter_key, filter_val
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, "match", match_meta)
|
||||
_make_asset_info(session, asset, "nomatch", nomatch_meta)
|
||||
_make_reference(session, asset, "match", match_meta)
|
||||
_make_reference(session, asset, "nomatch", nomatch_meta)
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={filter_key: filter_val}
|
||||
)
|
||||
assert total == 1
|
||||
assert infos[0].name == "match"
|
||||
assert refs[0].name == "match"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stored_meta,filter_key,filter_val",
|
||||
@@ -101,10 +101,10 @@ class TestMetadataFilterByType:
|
||||
self, session: Session, stored_meta, filter_key, filter_val
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, "item", stored_meta)
|
||||
_make_reference(session, asset, "item", stored_meta)
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={filter_key: filter_val}
|
||||
)
|
||||
assert total == 0
|
||||
@@ -127,13 +127,13 @@ class TestMetadataFilterNull:
|
||||
self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key
|
||||
):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, match_name, match_meta)
|
||||
_make_asset_info(session, asset, nomatch_name, nomatch_meta)
|
||||
_make_reference(session, asset, match_name, match_meta)
|
||||
_make_reference(session, asset, nomatch_name, nomatch_meta)
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, metadata_filter={filter_key: None})
|
||||
refs, _, total = list_references_page(session, metadata_filter={filter_key: None})
|
||||
assert total == 1
|
||||
assert infos[0].name == match_name
|
||||
assert refs[0].name == match_name
|
||||
|
||||
|
||||
class TestMetadataFilterList:
|
||||
@@ -142,14 +142,14 @@ class TestMetadataFilterList:
|
||||
def test_filter_by_list_matches_any(self, session: Session):
|
||||
"""List values should match ANY of the values (OR)."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, "cat_a", {"category": "a"})
|
||||
_make_asset_info(session, asset, "cat_b", {"category": "b"})
|
||||
_make_asset_info(session, asset, "cat_c", {"category": "c"})
|
||||
_make_reference(session, asset, "cat_a", {"category": "a"})
|
||||
_make_reference(session, asset, "cat_b", {"category": "b"})
|
||||
_make_reference(session, asset, "cat_c", {"category": "c"})
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, metadata_filter={"category": ["a", "b"]})
|
||||
refs, _, total = list_references_page(session, metadata_filter={"category": ["a", "b"]})
|
||||
assert total == 2
|
||||
names = {i.name for i in infos}
|
||||
names = {r.name for r in refs}
|
||||
assert names == {"cat_a", "cat_b"}
|
||||
|
||||
|
||||
@@ -159,16 +159,16 @@ class TestMetadataFilterMultipleKeys:
|
||||
def test_multiple_keys_must_all_match(self, session: Session):
|
||||
"""Multiple keys should ALL match (AND)."""
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, "match", {"type": "model", "version": 2})
|
||||
_make_asset_info(session, asset, "wrong_type", {"type": "config", "version": 2})
|
||||
_make_asset_info(session, asset, "wrong_version", {"type": "model", "version": 1})
|
||||
_make_reference(session, asset, "match", {"type": "model", "version": 2})
|
||||
_make_reference(session, asset, "wrong_type", {"type": "config", "version": 2})
|
||||
_make_reference(session, asset, "wrong_version", {"type": "model", "version": 1})
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(
|
||||
refs, _, total = list_references_page(
|
||||
session, metadata_filter={"type": "model", "version": 2}
|
||||
)
|
||||
assert total == 1
|
||||
assert infos[0].name == "match"
|
||||
assert refs[0].name == "match"
|
||||
|
||||
|
||||
class TestMetadataFilterEmptyDict:
|
||||
@@ -176,9 +176,9 @@ class TestMetadataFilterEmptyDict:
|
||||
|
||||
def test_empty_filter_returns_all(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
_make_asset_info(session, asset, "a", {"key": "val"})
|
||||
_make_asset_info(session, asset, "b", {})
|
||||
_make_reference(session, asset, "a", {"key": "val"})
|
||||
_make_reference(session, asset, "b", {})
|
||||
session.commit()
|
||||
|
||||
infos, _, total = list_asset_infos_page(session, metadata_filter={})
|
||||
refs, _, total = list_references_page(session, metadata_filter={})
|
||||
assert total == 2
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo, AssetInfoTag, AssetInfoMeta, Tag
|
||||
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, AssetReferenceMeta, Tag
|
||||
from app.assets.database.queries import (
|
||||
ensure_tags_exist,
|
||||
get_asset_tags,
|
||||
set_asset_info_tags,
|
||||
add_tags_to_asset_info,
|
||||
remove_tags_from_asset_info,
|
||||
get_reference_tags,
|
||||
set_reference_tags,
|
||||
add_tags_to_reference,
|
||||
remove_tags_from_reference,
|
||||
add_missing_tag_for_asset_id,
|
||||
remove_missing_tag_for_asset_id,
|
||||
list_tags_with_usage,
|
||||
@@ -23,9 +23,9 @@ def _make_asset(session: Session, hash_val: str | None = None) -> Asset:
|
||||
return asset
|
||||
|
||||
|
||||
def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetInfo:
|
||||
def _make_reference(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetReference:
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
@@ -33,9 +33,9 @@ def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_i
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return info
|
||||
return ref
|
||||
|
||||
|
||||
class TestEnsureTagsExist:
|
||||
@@ -73,35 +73,35 @@ class TestEnsureTagsExist:
|
||||
assert tag.tag_type == "system"
|
||||
|
||||
|
||||
class TestGetAssetTags:
|
||||
class TestGetReferenceTags:
|
||||
def test_returns_empty_for_no_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
tags = get_asset_tags(session, asset_info_id=info.id)
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert tags == []
|
||||
|
||||
def test_returns_tags_for_asset(self, session: Session):
|
||||
def test_returns_tags_for_reference(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
ensure_tags_exist(session, ["tag1", "tag2"])
|
||||
session.add_all([
|
||||
AssetInfoTag(asset_info_id=info.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
|
||||
AssetInfoTag(asset_info_id=info.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
|
||||
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
|
||||
AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
|
||||
])
|
||||
session.flush()
|
||||
|
||||
tags = get_asset_tags(session, asset_info_id=info.id)
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert set(tags) == {"tag1", "tag2"}
|
||||
|
||||
|
||||
class TestSetAssetInfoTags:
|
||||
class TestSetReferenceTags:
|
||||
def test_adds_new_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"])
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
|
||||
session.commit()
|
||||
|
||||
assert set(result["added"]) == {"a", "b"}
|
||||
@@ -110,10 +110,10 @@ class TestSetAssetInfoTags:
|
||||
|
||||
def test_removes_old_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b", "c"])
|
||||
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a"])
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["a"])
|
||||
session.commit()
|
||||
|
||||
assert result["added"] == []
|
||||
@@ -122,10 +122,10 @@ class TestSetAssetInfoTags:
|
||||
|
||||
def test_replaces_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"])
|
||||
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["b", "c"])
|
||||
set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
|
||||
result = set_reference_tags(session, reference_id=ref.id, tags=["b", "c"])
|
||||
session.commit()
|
||||
|
||||
assert result["added"] == ["c"]
|
||||
@@ -133,12 +133,12 @@ class TestSetAssetInfoTags:
|
||||
assert set(result["total"]) == {"b", "c"}
|
||||
|
||||
|
||||
class TestAddTagsToAssetInfo:
|
||||
class TestAddTagsToReference:
|
||||
def test_adds_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"])
|
||||
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
|
||||
session.commit()
|
||||
|
||||
assert set(result["added"]) == {"x", "y"}
|
||||
@@ -146,27 +146,27 @@ class TestAddTagsToAssetInfo:
|
||||
|
||||
def test_reports_already_present(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x"])
|
||||
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["x"])
|
||||
result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
|
||||
session.commit()
|
||||
|
||||
assert result["added"] == ["y"]
|
||||
assert result["already_present"] == ["x"]
|
||||
|
||||
def test_raises_for_missing_asset_info(self, session: Session):
|
||||
def test_raises_for_missing_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
add_tags_to_asset_info(session, asset_info_id="nonexistent", tags=["x"])
|
||||
add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
|
||||
|
||||
|
||||
class TestRemoveTagsFromAssetInfo:
|
||||
class TestRemoveTagsFromReference:
|
||||
def test_removes_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"])
|
||||
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "b"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "b"])
|
||||
session.commit()
|
||||
|
||||
assert set(result["removed"]) == {"a", "b"}
|
||||
@@ -175,54 +175,54 @@ class TestRemoveTagsFromAssetInfo:
|
||||
|
||||
def test_reports_not_present(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a"])
|
||||
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "x"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a"])
|
||||
result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "x"])
|
||||
session.commit()
|
||||
|
||||
assert result["removed"] == ["a"]
|
||||
assert result["not_present"] == ["x"]
|
||||
|
||||
def test_raises_for_missing_asset_info(self, session: Session):
|
||||
def test_raises_for_missing_reference(self, session: Session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
remove_tags_from_asset_info(session, asset_info_id="nonexistent", tags=["x"])
|
||||
remove_tags_from_reference(session, reference_id="nonexistent", tags=["x"])
|
||||
|
||||
|
||||
class TestMissingTagFunctions:
|
||||
def test_add_missing_tag_for_asset_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
tags = get_asset_tags(session, asset_info_id=info.id)
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert "missing" in tags
|
||||
|
||||
def test_add_missing_tag_is_idempotent(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="missing").all()
|
||||
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="missing").all()
|
||||
assert len(links) == 1
|
||||
|
||||
def test_remove_missing_tag_for_asset_id(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["missing"], tag_type="system")
|
||||
add_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
|
||||
remove_missing_tag_for_asset_id(session, asset_id=asset.id)
|
||||
session.commit()
|
||||
|
||||
tags = get_asset_tags(session, asset_info_id=info.id)
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert "missing" not in tags
|
||||
|
||||
|
||||
@@ -231,8 +231,8 @@ class TestListTagsWithUsage:
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags_with_usage(session)
|
||||
@@ -246,8 +246,8 @@ class TestListTagsWithUsage:
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags_with_usage(session, include_zero=False)
|
||||
@@ -278,11 +278,11 @@ class TestListTagsWithUsage:
|
||||
ensure_tags_exist(session, ["shared-tag", "owner-tag"])
|
||||
|
||||
asset = _make_asset(session, "hash1")
|
||||
shared_info = _make_asset_info(session, asset, name="shared", owner_id="")
|
||||
owner_info = _make_asset_info(session, asset, name="owned", owner_id="user1")
|
||||
shared_ref = _make_reference(session, asset, name="shared", owner_id="")
|
||||
owner_ref = _make_reference(session, asset, name="owned", owner_id="user1")
|
||||
|
||||
add_tags_to_asset_info(session, asset_info_id=shared_info.id, tags=["shared-tag"])
|
||||
add_tags_to_asset_info(session, asset_info_id=owner_info.id, tags=["owner-tag"])
|
||||
add_tags_to_reference(session, reference_id=shared_ref.id, tags=["shared-tag"])
|
||||
add_tags_to_reference(session, reference_id=owner_ref.id, tags=["owner-tag"])
|
||||
session.commit()
|
||||
|
||||
# Empty owner sees only shared
|
||||
@@ -301,29 +301,29 @@ class TestListTagsWithUsage:
|
||||
class TestBulkInsertTagsAndMeta:
|
||||
def test_inserts_tags(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"])
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
tag_rows = [
|
||||
{"asset_info_id": info.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
|
||||
{"asset_info_id": info.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
|
||||
{"asset_reference_id": ref.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
|
||||
{"asset_reference_id": ref.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
|
||||
]
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
|
||||
session.commit()
|
||||
|
||||
tags = get_asset_tags(session, asset_info_id=info.id)
|
||||
tags = get_reference_tags(session, reference_id=ref.id)
|
||||
assert set(tags) == {"bulk-tag1", "bulk-tag2"}
|
||||
|
||||
def test_inserts_meta(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
meta_rows = [
|
||||
{
|
||||
"asset_info_id": info.id,
|
||||
"asset_reference_id": ref.id,
|
||||
"key": "meta-key",
|
||||
"ordinal": 0,
|
||||
"val_str": "meta-value",
|
||||
@@ -335,32 +335,32 @@ class TestBulkInsertTagsAndMeta:
|
||||
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows)
|
||||
session.commit()
|
||||
|
||||
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all()
|
||||
meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
|
||||
assert len(meta) == 1
|
||||
assert meta[0].key == "meta-key"
|
||||
assert meta[0].val_str == "meta-value"
|
||||
|
||||
def test_ignores_conflicts(self, session: Session):
|
||||
asset = _make_asset(session, "hash1")
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["existing-tag"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing-tag"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["existing-tag"])
|
||||
session.commit()
|
||||
|
||||
now = get_utc_now()
|
||||
tag_rows = [
|
||||
{"asset_info_id": info.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
|
||||
{"asset_reference_id": ref.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
|
||||
]
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
|
||||
session.commit()
|
||||
|
||||
# Should still have only one tag link
|
||||
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="existing-tag").all()
|
||||
links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="existing-tag").all()
|
||||
assert len(links) == 1
|
||||
# Origin should be original, not overwritten
|
||||
assert links[0].origin == "manual"
|
||||
|
||||
def test_empty_lists_is_noop(self, session: Session):
|
||||
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[])
|
||||
assert session.query(AssetInfoTag).count() == 0
|
||||
assert session.query(AssetInfoMeta).count() == 0
|
||||
assert session.query(AssetReferenceTag).count() == 0
|
||||
assert session.query(AssetReferenceMeta).count() == 0
|
||||
|
||||
@@ -9,6 +9,12 @@ from sqlalchemy.orm import Session
|
||||
from app.assets.database.models import Base
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def autoclean_unit_test_assets():
|
||||
"""Override parent autouse fixture - service unit tests don't need server cleanup."""
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_engine():
|
||||
"""In-memory SQLite engine for fast unit tests."""
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.services import (
|
||||
get_asset_detail,
|
||||
@@ -20,14 +20,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 102
|
||||
return asset
|
||||
|
||||
|
||||
def _make_asset_info(
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetInfo:
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
@@ -35,70 +35,70 @@ def _make_asset_info(
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return info
|
||||
return ref
|
||||
|
||||
|
||||
class TestGetAssetDetail:
|
||||
def test_returns_none_for_nonexistent(self, mock_create_session):
|
||||
result = get_asset_detail(asset_info_id="nonexistent")
|
||||
result = get_asset_detail(reference_id="nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_returns_asset_with_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, name="test.bin")
|
||||
ref = _make_reference(session, asset, name="test.bin")
|
||||
ensure_tags_exist(session, ["alpha", "beta"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
|
||||
session.commit()
|
||||
|
||||
result = get_asset_detail(asset_info_id=info.id)
|
||||
result = get_asset_detail(reference_id=ref.id)
|
||||
|
||||
assert result is not None
|
||||
assert result.info.id == info.id
|
||||
assert result.ref.id == ref.id
|
||||
assert result.asset.hash == asset.hash
|
||||
assert set(result.tags) == {"alpha", "beta"}
|
||||
|
||||
def test_respects_owner_visibility(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
# Wrong owner cannot see
|
||||
result = get_asset_detail(asset_info_id=info.id, owner_id="user2")
|
||||
result = get_asset_detail(reference_id=ref.id, owner_id="user2")
|
||||
assert result is None
|
||||
|
||||
# Correct owner can see
|
||||
result = get_asset_detail(asset_info_id=info.id, owner_id="user1")
|
||||
result = get_asset_detail(reference_id=ref.id, owner_id="user1")
|
||||
assert result is not None
|
||||
|
||||
|
||||
class TestUpdateAssetMetadata:
|
||||
def test_updates_name(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, name="old_name.bin")
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset, name="old_name.bin")
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
update_asset_metadata(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
name="new_name.bin",
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_info = session.get(AssetInfo, info_id)
|
||||
assert updated_info.name == "new_name.bin"
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.name == "new_name.bin"
|
||||
|
||||
def test_updates_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["old"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["old"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["old"])
|
||||
session.commit()
|
||||
|
||||
result = update_asset_metadata(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["new1", "new2"],
|
||||
)
|
||||
|
||||
@@ -107,84 +107,84 @@ class TestUpdateAssetMetadata:
|
||||
|
||||
def test_updates_user_metadata(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
update_asset_metadata(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
user_metadata={"key": "value", "num": 42},
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_info = session.get(AssetInfo, info_id)
|
||||
assert updated_info.user_metadata["key"] == "value"
|
||||
assert updated_info.user_metadata["num"] == 42
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.user_metadata["key"] == "value"
|
||||
assert updated_ref.user_metadata["num"] == 42
|
||||
|
||||
def test_raises_for_nonexistent(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
update_asset_metadata(asset_info_id="nonexistent", name="fail")
|
||||
update_asset_metadata(reference_id="nonexistent", name="fail")
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
update_asset_metadata(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
name="new",
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
|
||||
class TestDeleteAssetReference:
|
||||
def test_deletes_asset_info(self, mock_create_session, session: Session):
|
||||
def test_deletes_reference(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_reference(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=False,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
assert session.get(AssetInfo, info_id) is None
|
||||
assert session.get(AssetReference, ref_id) is None
|
||||
|
||||
def test_returns_false_for_nonexistent(self, mock_create_session):
|
||||
result = delete_asset_reference(
|
||||
asset_info_id="nonexistent",
|
||||
reference_id="nonexistent",
|
||||
owner_id="",
|
||||
)
|
||||
assert result is False
|
||||
|
||||
def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = delete_asset_reference(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
assert result is False
|
||||
assert session.get(AssetInfo, info_id) is not None
|
||||
assert session.get(AssetReference, ref_id) is not None
|
||||
|
||||
def test_keeps_asset_if_other_infos_exist(self, mock_create_session, session: Session):
|
||||
def test_keeps_asset_if_other_references_exist(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info1 = _make_asset_info(session, asset, name="info1")
|
||||
_make_asset_info(session, asset, name="info2") # Second info keeps asset alive
|
||||
ref1 = _make_reference(session, asset, name="ref1")
|
||||
_make_reference(session, asset, name="ref2") # Second ref keeps asset alive
|
||||
asset_id = asset.id
|
||||
session.commit()
|
||||
|
||||
delete_asset_reference(
|
||||
asset_info_id=info1.id,
|
||||
reference_id=ref1.id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=True,
|
||||
)
|
||||
@@ -194,19 +194,19 @@ class TestDeleteAssetReference:
|
||||
|
||||
def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
asset_id = asset.id
|
||||
info_id = info.id
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
delete_asset_reference(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
owner_id="",
|
||||
delete_content_if_orphan=True,
|
||||
)
|
||||
|
||||
# Both info and asset should be gone
|
||||
assert session.get(AssetInfo, info_id) is None
|
||||
# Both ref and asset should be gone
|
||||
assert session.get(AssetReference, ref_id) is None
|
||||
assert session.get(Asset, asset_id) is None
|
||||
|
||||
|
||||
@@ -214,51 +214,51 @@ class TestSetAssetPreview:
|
||||
def test_sets_preview(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session, hash_val="blake3:main")
|
||||
preview_asset = _make_asset(session, hash_val="blake3:preview")
|
||||
info = _make_asset_info(session, asset)
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset)
|
||||
ref_id = ref.id
|
||||
preview_id = preview_asset.id
|
||||
session.commit()
|
||||
|
||||
set_asset_preview(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
preview_asset_id=preview_id,
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_info = session.get(AssetInfo, info_id)
|
||||
assert updated_info.preview_id == preview_id
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.preview_id == preview_id
|
||||
|
||||
def test_clears_preview(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
preview_asset = _make_asset(session, hash_val="blake3:preview")
|
||||
info = _make_asset_info(session, asset)
|
||||
info.preview_id = preview_asset.id
|
||||
info_id = info.id
|
||||
ref = _make_reference(session, asset)
|
||||
ref.preview_id = preview_asset.id
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
set_asset_preview(
|
||||
asset_info_id=info_id,
|
||||
reference_id=ref_id,
|
||||
preview_asset_id=None,
|
||||
)
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
updated_info = session.get(AssetInfo, info_id)
|
||||
assert updated_info.preview_id is None
|
||||
updated_ref = session.get(AssetReference, ref_id)
|
||||
assert updated_ref.preview_id is None
|
||||
|
||||
def test_raises_for_nonexistent_info(self, mock_create_session):
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
set_asset_preview(asset_info_id="nonexistent")
|
||||
set_asset_preview(reference_id="nonexistent")
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
set_asset_preview(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
preview_asset_id=None,
|
||||
owner_id="user2",
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class TestBatchInsertSeedAssets:
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_infos == 1
|
||||
assert result.inserted_refs == 1
|
||||
|
||||
# Verify Asset has mime_type populated
|
||||
assets = session.query(Asset).all()
|
||||
@@ -58,7 +58,7 @@ class TestBatchInsertSeedAssets:
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_infos == 1
|
||||
assert result.inserted_refs == 1
|
||||
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
@@ -93,13 +93,12 @@ class TestBatchInsertSeedAssets:
|
||||
|
||||
result = batch_insert_seed_assets(session, specs=specs, owner_id="")
|
||||
|
||||
assert result.inserted_infos == len(test_cases)
|
||||
assert result.inserted_refs == len(test_cases)
|
||||
|
||||
for filename, expected_mime in test_cases:
|
||||
from app.assets.database.models import AssetInfo
|
||||
info = session.query(AssetInfo).filter_by(name=filename).first()
|
||||
assert info is not None
|
||||
asset = session.query(Asset).filter_by(id=info.asset_id).first()
|
||||
ref = session.query(AssetReference).filter_by(name=filename).first()
|
||||
assert ref is not None
|
||||
asset = session.query(Asset).filter_by(id=ref.asset_id).first()
|
||||
assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"
|
||||
|
||||
|
||||
|
||||
253
tests-unit/assets_test/services/test_enrich.py
Normal file
253
tests-unit/assets_test/services/test_enrich.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""Tests for asset enrichment (mime_type and hash population)."""
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.scanner import (
|
||||
ENRICHMENT_HASHED,
|
||||
ENRICHMENT_METADATA,
|
||||
ENRICHMENT_STUB,
|
||||
enrich_asset,
|
||||
)
|
||||
|
||||
|
||||
def _create_stub_asset(
|
||||
session: Session,
|
||||
file_path: str,
|
||||
asset_id: str = "test-asset-id",
|
||||
reference_id: str = "test-ref-id",
|
||||
name: str | None = None,
|
||||
) -> tuple[Asset, AssetReference]:
|
||||
"""Create a stub asset with reference for testing enrichment."""
|
||||
asset = Asset(
|
||||
id=asset_id,
|
||||
hash=None,
|
||||
size_bytes=100,
|
||||
mime_type=None,
|
||||
)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
|
||||
ref = AssetReference(
|
||||
id=reference_id,
|
||||
asset_id=asset_id,
|
||||
name=name or f"test-asset-{asset_id}",
|
||||
owner_id="system",
|
||||
file_path=file_path,
|
||||
mtime_ns=1234567890000000000,
|
||||
enrichment_level=ENRICHMENT_STUB,
|
||||
)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
|
||||
return asset, ref
|
||||
|
||||
|
||||
class TestEnrichAsset:
|
||||
def test_extracts_mime_type_and_updates_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify mime_type is written to the Asset table during enrichment."""
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"\x00" * 100)
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-1", "ref-1"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.create_session") as mock_cs:
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
mock_cs.side_effect = _create_session
|
||||
|
||||
new_level = enrich_asset(
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=False,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_METADATA
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-1")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.mime_type == "application/safetensors"
|
||||
|
||||
def test_computes_hash_and_updates_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify hash is written to the Asset table during enrichment."""
|
||||
file_path = temp_dir / "data.bin"
|
||||
file_path.write_bytes(b"test content for hashing")
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-2", "ref-2"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.create_session") as mock_cs:
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
mock_cs.side_effect = _create_session
|
||||
|
||||
new_level = enrich_asset(
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_HASHED
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-2")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.hash is not None
|
||||
assert updated_asset.hash.startswith("blake3:")
|
||||
|
||||
def test_enrichment_updates_both_mime_and_hash(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify both mime_type and hash are set when full enrichment runs."""
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"\x00" * 50)
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-3", "ref-3"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.create_session") as mock_cs:
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
mock_cs.side_effect = _create_session
|
||||
|
||||
enrich_asset(
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-3")
|
||||
assert updated_asset is not None
|
||||
assert updated_asset.mime_type == "application/safetensors"
|
||||
assert updated_asset.hash is not None
|
||||
assert updated_asset.hash.startswith("blake3:")
|
||||
|
||||
def test_missing_file_returns_stub_level(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify missing files don't cause errors and return STUB level."""
|
||||
file_path = temp_dir / "nonexistent.bin"
|
||||
|
||||
asset, ref = _create_stub_asset(
|
||||
session, str(file_path), "asset-4", "ref-4"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.create_session") as mock_cs:
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
mock_cs.side_effect = _create_session
|
||||
|
||||
new_level = enrich_asset(
|
||||
file_path=str(file_path),
|
||||
reference_id=ref.id,
|
||||
asset_id=asset.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
assert new_level == ENRICHMENT_STUB
|
||||
|
||||
session.expire_all()
|
||||
updated_asset = session.get(Asset, "asset-4")
|
||||
assert updated_asset.mime_type is None
|
||||
assert updated_asset.hash is None
|
||||
|
||||
def test_duplicate_hash_merges_into_existing_asset(
|
||||
self, db_engine, temp_dir: Path, session: Session
|
||||
):
|
||||
"""Verify duplicate files merge into existing asset instead of failing."""
|
||||
file_path_1 = temp_dir / "file1.bin"
|
||||
file_path_2 = temp_dir / "file2.bin"
|
||||
content = b"identical content"
|
||||
file_path_1.write_bytes(content)
|
||||
file_path_2.write_bytes(content)
|
||||
|
||||
asset1, ref1 = _create_stub_asset(
|
||||
session, str(file_path_1), "asset-dup-1", "ref-dup-1"
|
||||
)
|
||||
asset2, ref2 = _create_stub_asset(
|
||||
session, str(file_path_2), "asset-dup-2", "ref-dup-2"
|
||||
)
|
||||
session.commit()
|
||||
|
||||
with patch("app.assets.scanner.create_session") as mock_cs:
|
||||
from contextlib import contextmanager
|
||||
|
||||
@contextmanager
|
||||
def _create_session():
|
||||
with Session(db_engine) as sess:
|
||||
yield sess
|
||||
|
||||
mock_cs.side_effect = _create_session
|
||||
|
||||
enrich_asset(
|
||||
file_path=str(file_path_1),
|
||||
reference_id=ref1.id,
|
||||
asset_id=asset1.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
enrich_asset(
|
||||
file_path=str(file_path_2),
|
||||
reference_id=ref2.id,
|
||||
asset_id=asset2.id,
|
||||
extract_metadata=True,
|
||||
compute_hash=True,
|
||||
)
|
||||
|
||||
session.expire_all()
|
||||
|
||||
updated_asset1 = session.get(Asset, "asset-dup-1")
|
||||
assert updated_asset1 is not None
|
||||
assert updated_asset1.hash is not None
|
||||
|
||||
updated_asset2 = session.get(Asset, "asset-dup-2")
|
||||
assert updated_asset2 is None
|
||||
|
||||
updated_ref2 = session.get(AssetReference, "ref-dup-2")
|
||||
assert updated_ref2 is not None
|
||||
assert updated_ref2.asset_id == "asset-dup-1"
|
||||
@@ -4,13 +4,13 @@ from pathlib import Path
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetCacheState, AssetInfo, Tag
|
||||
from app.assets.database.queries import get_asset_tags
|
||||
from app.assets.database.models import Asset, AssetReference, Tag
|
||||
from app.assets.database.queries import get_reference_tags
|
||||
from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
|
||||
|
||||
|
||||
class TestIngestFileFromPath:
|
||||
def test_creates_asset_and_cache_state(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "test_file.bin"
|
||||
file_path.write_bytes(b"test content")
|
||||
|
||||
@@ -23,19 +23,19 @@ class TestIngestFileFromPath:
|
||||
)
|
||||
|
||||
assert result.asset_created is True
|
||||
assert result.state_created is True
|
||||
assert result.asset_info_id is None # no info_name provided
|
||||
assert result.ref_created is True
|
||||
assert result.reference_id is not None
|
||||
|
||||
# Verify DB state
|
||||
assets = session.query(Asset).all()
|
||||
assert len(assets) == 1
|
||||
assert assets[0].hash == "blake3:abc123"
|
||||
|
||||
states = session.query(AssetCacheState).all()
|
||||
assert len(states) == 1
|
||||
assert states[0].file_path == str(file_path)
|
||||
refs = session.query(AssetReference).all()
|
||||
assert len(refs) == 1
|
||||
assert refs[0].file_path == str(file_path)
|
||||
|
||||
def test_creates_asset_info_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "model.safetensors"
|
||||
file_path.write_bytes(b"model data")
|
||||
|
||||
@@ -50,12 +50,12 @@ class TestIngestFileFromPath:
|
||||
)
|
||||
|
||||
assert result.asset_created is True
|
||||
assert result.asset_info_id is not None
|
||||
assert result.reference_id is not None
|
||||
|
||||
info = session.query(AssetInfo).first()
|
||||
assert info is not None
|
||||
assert info.name == "My Model"
|
||||
assert info.owner_id == "user1"
|
||||
ref = session.query(AssetReference).first()
|
||||
assert ref is not None
|
||||
assert ref.name == "My Model"
|
||||
assert ref.owner_id == "user1"
|
||||
|
||||
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "tagged.bin"
|
||||
@@ -70,7 +70,7 @@ class TestIngestFileFromPath:
|
||||
tags=["models", "checkpoints"],
|
||||
)
|
||||
|
||||
assert result.asset_info_id is not None
|
||||
assert result.reference_id is not None
|
||||
|
||||
# Verify tags were created and linked
|
||||
tags = session.query(Tag).all()
|
||||
@@ -78,8 +78,8 @@ class TestIngestFileFromPath:
|
||||
assert "models" in tag_names
|
||||
assert "checkpoints" in tag_names
|
||||
|
||||
asset_tags = get_asset_tags(session, asset_info_id=result.asset_info_id)
|
||||
assert set(asset_tags) == {"models", "checkpoints"}
|
||||
ref_tags = get_reference_tags(session, reference_id=result.reference_id)
|
||||
assert set(ref_tags) == {"models", "checkpoints"}
|
||||
|
||||
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "dup.bin"
|
||||
@@ -102,7 +102,7 @@ class TestIngestFileFromPath:
|
||||
mtime_ns=1234567890000000001, # different mtime
|
||||
)
|
||||
assert r2.asset_created is False
|
||||
assert r2.state_updated is True or r2.state_created is False
|
||||
assert r2.ref_updated is True or r2.ref_created is False
|
||||
|
||||
# Still only one asset
|
||||
assets = session.query(Asset).all()
|
||||
@@ -127,9 +127,9 @@ class TestIngestFileFromPath:
|
||||
preview_id=preview_id,
|
||||
)
|
||||
|
||||
assert result.asset_info_id is not None
|
||||
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first()
|
||||
assert info.preview_id == preview_id
|
||||
assert result.reference_id is not None
|
||||
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
||||
assert ref.preview_id == preview_id
|
||||
|
||||
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
|
||||
file_path = temp_dir / "bad_preview.bin"
|
||||
@@ -144,13 +144,13 @@ class TestIngestFileFromPath:
|
||||
preview_id="nonexistent-uuid",
|
||||
)
|
||||
|
||||
assert result.asset_info_id is not None
|
||||
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first()
|
||||
assert info.preview_id is None
|
||||
assert result.reference_id is not None
|
||||
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
||||
assert ref.preview_id is None
|
||||
|
||||
|
||||
class TestRegisterExistingAsset:
|
||||
def test_creates_info_for_existing_asset(self, mock_create_session, session: Session):
|
||||
def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
|
||||
# Create existing asset
|
||||
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
|
||||
session.add(asset)
|
||||
@@ -168,42 +168,43 @@ class TestRegisterExistingAsset:
|
||||
|
||||
# Verify by re-fetching from DB
|
||||
session.expire_all()
|
||||
infos = session.query(AssetInfo).filter_by(name="Registered Asset").all()
|
||||
assert len(infos) == 1
|
||||
refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
|
||||
assert len(refs) == 1
|
||||
|
||||
def test_returns_existing_info(self, mock_create_session, session: Session):
|
||||
# Create asset and info
|
||||
asset = Asset(hash="blake3:withinfo", size_bytes=512)
|
||||
def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
|
||||
# Create asset and reference
|
||||
asset = Asset(hash="blake3:withref", size_bytes=512)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
|
||||
from app.assets.helpers import get_utc_now
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id="",
|
||||
name="Existing Info",
|
||||
name="Existing Ref",
|
||||
asset_id=asset.id,
|
||||
created_at=get_utc_now(),
|
||||
updated_at=get_utc_now(),
|
||||
last_access_time=get_utc_now(),
|
||||
)
|
||||
session.add(info)
|
||||
session.flush() # Flush to get the ID
|
||||
info_id = info.id
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
ref_id = ref.id
|
||||
session.commit()
|
||||
|
||||
result = _register_existing_asset(
|
||||
asset_hash="blake3:withinfo",
|
||||
name="Existing Info",
|
||||
asset_hash="blake3:withref",
|
||||
name="Existing Ref",
|
||||
owner_id="",
|
||||
)
|
||||
|
||||
assert result.created is False
|
||||
# Multiple files with same name are allowed
|
||||
assert result.created is True
|
||||
|
||||
# Verify only one AssetInfo exists for this name
|
||||
# Verify two AssetReferences exist for this name
|
||||
session.expire_all()
|
||||
infos = session.query(AssetInfo).filter_by(name="Existing Info").all()
|
||||
assert len(infos) == 1
|
||||
assert infos[0].id == info_id
|
||||
refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
|
||||
assert len(refs) == 2
|
||||
assert ref_id in [r.id for r in refs]
|
||||
|
||||
def test_raises_for_nonexistent_hash(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="No asset with hash"):
|
||||
@@ -212,14 +213,14 @@ class TestRegisterExistingAsset:
|
||||
name="Fail",
|
||||
)
|
||||
|
||||
def test_applies_tags_to_new_info(self, mock_create_session, session: Session):
|
||||
def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
|
||||
asset = Asset(hash="blake3:tagged", size_bytes=256)
|
||||
session.add(asset)
|
||||
session.commit()
|
||||
|
||||
result = _register_existing_asset(
|
||||
asset_hash="blake3:tagged",
|
||||
name="Tagged Info",
|
||||
name="Tagged Ref",
|
||||
tags=["alpha", "beta"],
|
||||
)
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetInfo
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
|
||||
from app.assets.helpers import get_utc_now
|
||||
from app.assets.services import apply_tags, remove_tags, list_tags
|
||||
|
||||
@@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
|
||||
return asset
|
||||
|
||||
|
||||
def _make_asset_info(
|
||||
def _make_reference(
|
||||
session: Session,
|
||||
asset: Asset,
|
||||
name: str = "test",
|
||||
owner_id: str = "",
|
||||
) -> AssetInfo:
|
||||
) -> AssetReference:
|
||||
now = get_utc_now()
|
||||
info = AssetInfo(
|
||||
ref = AssetReference(
|
||||
owner_id=owner_id,
|
||||
name=name,
|
||||
asset_id=asset.id,
|
||||
@@ -30,19 +30,19 @@ def _make_asset_info(
|
||||
updated_at=now,
|
||||
last_access_time=now,
|
||||
)
|
||||
session.add(info)
|
||||
session.add(ref)
|
||||
session.flush()
|
||||
return info
|
||||
return ref
|
||||
|
||||
|
||||
class TestApplyTags:
|
||||
def test_adds_new_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
session.commit()
|
||||
|
||||
result = apply_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["alpha", "beta"],
|
||||
)
|
||||
|
||||
@@ -52,31 +52,31 @@ class TestApplyTags:
|
||||
|
||||
def test_reports_already_present(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["existing"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["existing"])
|
||||
session.commit()
|
||||
|
||||
result = apply_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["existing", "new"],
|
||||
)
|
||||
|
||||
assert result.added == ["new"]
|
||||
assert result.already_present == ["existing"]
|
||||
|
||||
def test_raises_for_nonexistent_info(self, mock_create_session):
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
apply_tags(asset_info_id="nonexistent", tags=["x"])
|
||||
apply_tags(reference_id="nonexistent", tags=["x"])
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
apply_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["new"],
|
||||
owner_id="user2",
|
||||
)
|
||||
@@ -85,13 +85,13 @@ class TestApplyTags:
|
||||
class TestRemoveTags:
|
||||
def test_removes_tags(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["a", "b", "c"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
|
||||
session.commit()
|
||||
|
||||
result = remove_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["a", "b"],
|
||||
)
|
||||
|
||||
@@ -101,31 +101,31 @@ class TestRemoveTags:
|
||||
|
||||
def test_reports_not_present(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
ref = _make_reference(session, asset)
|
||||
ensure_tags_exist(session, ["present"])
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["present"])
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["present"])
|
||||
session.commit()
|
||||
|
||||
result = remove_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["present", "absent"],
|
||||
)
|
||||
|
||||
assert result.removed == ["present"]
|
||||
assert result.not_present == ["absent"]
|
||||
|
||||
def test_raises_for_nonexistent_info(self, mock_create_session):
|
||||
def test_raises_for_nonexistent_ref(self, mock_create_session):
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
remove_tags(asset_info_id="nonexistent", tags=["x"])
|
||||
remove_tags(reference_id="nonexistent", tags=["x"])
|
||||
|
||||
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset, owner_id="user1")
|
||||
ref = _make_reference(session, asset, owner_id="user1")
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(PermissionError, match="not owner"):
|
||||
remove_tags(
|
||||
asset_info_id=info.id,
|
||||
reference_id=ref.id,
|
||||
tags=["x"],
|
||||
owner_id="user2",
|
||||
)
|
||||
@@ -135,8 +135,8 @@ class TestListTags:
|
||||
def test_returns_tags_with_counts(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags()
|
||||
@@ -149,8 +149,8 @@ class TestListTags:
|
||||
def test_excludes_zero_counts(self, mock_create_session, session: Session):
|
||||
ensure_tags_exist(session, ["used", "unused"])
|
||||
asset = _make_asset(session)
|
||||
info = _make_asset_info(session, asset)
|
||||
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
|
||||
ref = _make_reference(session, asset)
|
||||
add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
|
||||
session.commit()
|
||||
|
||||
rows, total = list_tags(include_zero=False)
|
||||
|
||||
@@ -24,11 +24,11 @@ def test_create_from_hash_success(
|
||||
assert b1["created_new"] is False
|
||||
aid = b1["id"]
|
||||
|
||||
# Calling again with the same name should return the same AssetInfo id
|
||||
# Calling again with the same name creates a new AssetInfo (duplicates allowed)
|
||||
r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 201, b2
|
||||
assert b2["id"] == aid
|
||||
assert b2["id"] != aid # new reference, not the same one
|
||||
|
||||
|
||||
def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict):
|
||||
|
||||
@@ -18,25 +18,24 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma
|
||||
assert r1.status_code == 201, a1
|
||||
assert a1["created_new"] is True
|
||||
|
||||
# Second upload with the same data and name should return created_new == False and the same asset
|
||||
# Second upload with the same data and name creates a new AssetInfo (duplicates allowed)
|
||||
# Returns 200 because Asset already exists, but a new AssetInfo is created
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a2 = r2.json()
|
||||
assert r2.status_code == 200, a2
|
||||
assert a2["created_new"] is False
|
||||
assert r2.status_code in (200, 201), a2
|
||||
assert a2["asset_hash"] == a1["asset_hash"]
|
||||
assert a2["id"] == a1["id"] # old reference
|
||||
assert a2["id"] != a1["id"] # new reference with same content
|
||||
|
||||
# Third upload with the same data but new name should return created_new == False and the new AssetReference
|
||||
# Third upload with the same data but different name also creates new AssetInfo
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a3 = r2.json()
|
||||
assert r2.status_code == 200, a3
|
||||
assert a3["created_new"] is False
|
||||
r3 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a3 = r3.json()
|
||||
assert r3.status_code in (200, 201), a3
|
||||
assert a3["asset_hash"] == a1["asset_hash"]
|
||||
assert a3["id"] != a1["id"] # old reference
|
||||
assert a3["id"] != a1["id"]
|
||||
|
||||
|
||||
def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):
|
||||
|
||||
Reference in New Issue
Block a user