Populate mime_type for assets in scanner and API paths

- Add custom MIME type registrations for model files (.safetensors, .pt, .ckpt, .gguf, .yaml)
- Pass mime_type through SeedAssetSpec to bulk_ingest
- Re-register types before use since server.py mimetypes.init() resets them
- Add tests for bulk ingest mime_type handling

Amp-Thread-ID: https://ampcode.com/threads/T-019c3626-c6ad-7139-a570-62da4e656a1a
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-07 14:00:06 -08:00
parent 53869fb0c7
commit 105e54e420
4 changed files with 178 additions and 2 deletions

View File

@@ -320,6 +320,9 @@ def build_asset_specs(
except Exception as e:
logging.warning("Failed to hash %s: %s", abs_p, e)
mime_type = metadata.content_type if metadata else None
if mime_type is None:
print(f"[build_asset_specs] no mime_type for {abs_p} (metadata={metadata is not None})")
specs.append(
{
"abs_path": abs_p,
@@ -330,6 +333,7 @@ def build_asset_specs(
"fname": rel_fname,
"metadata": metadata,
"hash": asset_hash,
"mime_type": mime_type,
}
)
tag_pool.update(tags)

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import logging
import os
import uuid
from dataclasses import dataclass
@@ -38,6 +39,7 @@ class SeedAssetSpec(TypedDict):
fname: str
metadata: ExtractedMetadata | None
hash: str | None
mime_type: str | None
class AssetRow(TypedDict):
@@ -162,12 +164,15 @@ def batch_insert_seed_assets(
absolute_path_list.append(absolute_path)
path_to_asset_id[absolute_path] = asset_id
mime_type = spec.get("mime_type")
if mime_type is None:
logging.info("batch_insert_seed_assets: no mime_type for %s", absolute_path)
asset_rows.append(
{
"id": asset_id,
"hash": spec.get("hash"),
"size_bytes": spec["size_bytes"],
"mime_type": None,
"mime_type": mime_type,
"created_at": current_time,
}
)

View File

@@ -20,6 +20,31 @@ SAFETENSORS_EXTENSIONS = frozenset({".safetensors", ".sft"})
# Maximum safetensors header size to read (8MB)
MAX_SAFETENSORS_HEADER_SIZE = 8 * 1024 * 1024
def _register_custom_mime_types():
"""Register custom MIME types for model and config files.
Called before each use because mimetypes.init() in server.py resets the database.
Uses a quick check to avoid redundant registrations.
"""
# Quick check if already registered (avoids redundant add_type calls)
test_result, _ = mimetypes.guess_type("test.safetensors")
if test_result == "application/safetensors":
return
mimetypes.add_type("application/safetensors", ".safetensors")
mimetypes.add_type("application/safetensors", ".sft")
mimetypes.add_type("application/pytorch", ".pt")
mimetypes.add_type("application/pytorch", ".pth")
mimetypes.add_type("application/pickle", ".ckpt")
mimetypes.add_type("application/pickle", ".pkl")
mimetypes.add_type("application/gguf", ".gguf")
mimetypes.add_type("application/yaml", ".yaml")
mimetypes.add_type("application/yaml", ".yml")
# Register custom types at module load
_register_custom_mime_types()
@dataclass
class ExtractedMetadata:
@@ -284,9 +309,12 @@ def extract_file_metadata(
_, ext = os.path.splitext(abs_path)
meta.format = ext.lstrip(".").lower() if ext else ""
# MIME type guess
# MIME type guess (re-register in case mimetypes.init() was called elsewhere)
_register_custom_mime_types()
mime_type, _ = mimetypes.guess_type(abs_path)
meta.content_type = mime_type
if mime_type is None:
print(f"[extract_file_metadata] No mime_type for {abs_path}")
# Size from stat
if stat_result is None: