From b3e04d88bb6bd26c8226b7af5dd84d2221e57692 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:31:35 -0700 Subject: [PATCH] Merge system+user metadata into filter projection Extract rebuild_metadata_projection() to build AssetReferenceMeta rows from {**system_metadata, **user_metadata}, so system-generated metadata is queryable via metadata_filter and user keys override system keys. Co-Authored-By: Claude Opus 4.6 --- app/assets/database/queries/__init__.py | 4 + .../database/queries/asset_reference.py | 77 +++++++++++++------ .../assets_test/queries/test_metadata.py | 51 +++++++++++- 3 files changed, 105 insertions(+), 27 deletions(-) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 17ea8f27a..3f9dd6b2b 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -35,11 +35,13 @@ from app.assets.database.queries.asset_reference import ( list_references_by_asset_id, list_references_page, mark_references_missing_outside_prefixes, + rebuild_metadata_projection, reference_exists, reference_exists_for_asset_id, restore_references_by_paths, set_reference_metadata, set_reference_preview, + set_reference_system_metadata, soft_delete_reference_by_id, update_reference_access_time, update_reference_name, @@ -107,6 +109,7 @@ __all__ = [ "list_tags_with_usage", "mark_references_missing_outside_prefixes", "reassign_asset_references", + "rebuild_metadata_projection", "reference_exists", "reference_exists_for_asset_id", "remove_missing_tag_for_asset_id", @@ -114,6 +117,7 @@ __all__ = [ "restore_references_by_paths", "set_reference_metadata", "set_reference_preview", + "set_reference_system_metadata", "soft_delete_reference_by_id", "set_reference_tags", "update_asset_hash_and_mime", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 24e7743a2..cab824edf 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -432,6 +432,42 @@ def update_reference_updated_at( ) +def rebuild_metadata_projection(session: Session, ref: AssetReference) -> None: + """Delete and rebuild AssetReferenceMeta rows from merged system+user metadata. + + The merged dict is ``{**system_metadata, **user_metadata}`` so user keys + override system keys of the same name. + """ + session.execute( + delete(AssetReferenceMeta).where( + AssetReferenceMeta.asset_reference_id == ref.id + ) + ) + session.flush() + + merged = {**(ref.system_metadata or {}), **(ref.user_metadata or {})} + if not merged: + return + + rows: list[AssetReferenceMeta] = [] + for k, v in merged.items(): + for r in convert_metadata_to_rows(k, v): + rows.append( + AssetReferenceMeta( + asset_reference_id=ref.id, + key=r["key"], + ordinal=int(r["ordinal"]), + val_str=r.get("val_str"), + val_num=r.get("val_num"), + val_bool=r.get("val_bool"), + val_json=r.get("val_json"), + ) + ) + if rows: + session.add_all(rows) + session.flush() + + def set_reference_metadata( session: Session, reference_id: str, @@ -445,33 +481,24 @@ def set_reference_metadata( ref.updated_at = get_utc_now() session.flush() - session.execute( - delete(AssetReferenceMeta).where( - AssetReferenceMeta.asset_reference_id == reference_id - ) - ) + rebuild_metadata_projection(session, ref) + + +def set_reference_system_metadata( + session: Session, + reference_id: str, + system_metadata: dict | None = None, +) -> None: + """Set system_metadata on a reference and rebuild the merged projection.""" + ref = session.get(AssetReference, reference_id) + if not ref: + raise ValueError(f"AssetReference {reference_id} not found") + + ref.system_metadata = system_metadata or {} + ref.updated_at = get_utc_now() session.flush() - if not user_metadata: - return - - rows: list[AssetReferenceMeta] = [] - for k, v in user_metadata.items(): - for r in convert_metadata_to_rows(k, v): - rows.append( - AssetReferenceMeta( - asset_reference_id=reference_id, - key=r["key"], - ordinal=int(r["ordinal"]), - val_str=r.get("val_str"), - val_num=r.get("val_num"), - val_bool=r.get("val_bool"), - val_json=r.get("val_json"), - ) - ) - if rows: - session.add_all(rows) - session.flush() + rebuild_metadata_projection(session, ref) def delete_reference_by_id( diff --git a/tests-unit/assets_test/queries/test_metadata.py b/tests-unit/assets_test/queries/test_metadata.py index 6a545e819..d7a747789 100644 --- a/tests-unit/assets_test/queries/test_metadata.py +++ b/tests-unit/assets_test/queries/test_metadata.py @@ -20,6 +20,7 @@ def _make_reference( asset: Asset, name: str, metadata: dict | None = None, + system_metadata: dict | None = None, ) -> AssetReference: now = get_utc_now() ref = AssetReference( @@ -27,6 +28,7 @@ def _make_reference( name=name, asset_id=asset.id, user_metadata=metadata, + system_metadata=system_metadata, created_at=now, updated_at=now, last_access_time=now, @@ -34,8 +36,10 @@ def _make_reference( session.add(ref) session.flush() - if metadata: - for key, val in metadata.items(): + # Build merged projection: {**system_metadata, **user_metadata} + merged = {**(system_metadata or {}), **(metadata or {})} + if merged: + for key, val in merged.items(): for row in convert_metadata_to_rows(key, val): meta_row = AssetReferenceMeta( asset_reference_id=ref.id, @@ -182,3 +186,46 @@ class TestMetadataFilterEmptyDict: refs, _, total = list_references_page(session, metadata_filter={}) assert total == 2 + + +class TestSystemMetadataProjection: + """Tests for system_metadata merging into the filter projection.""" + + def test_system_metadata_keys_are_filterable(self, session: Session): + """system_metadata keys should appear in the merged projection.""" + asset = _make_asset(session, "hash1") + _make_reference( + session, asset, "with_sys", + system_metadata={"source": "scanner"}, + ) + _make_reference(session, asset, "without_sys") + session.commit() + + refs, _, total = list_references_page( + session, metadata_filter={"source": "scanner"} + ) + assert total == 1 + assert refs[0].name == "with_sys" + + def test_user_metadata_overrides_system_metadata(self, session: Session): + """user_metadata should win when both have the same key.""" + asset = _make_asset(session, "hash1") + _make_reference( + session, asset, "overridden", + metadata={"origin": "user_upload"}, + system_metadata={"origin": "auto_scan"}, + ) + session.commit() + + # Should match the user value, not the system value + refs, _, total = list_references_page( + session, metadata_filter={"origin": "user_upload"} + ) + assert total == 1 + assert refs[0].name == "overridden" + + # Should NOT match the system value (it was overridden) + refs, _, total = list_references_page( + session, metadata_filter={"origin": "auto_scan"} + ) + assert total == 0