diff --git a/app/assets/services/hashing.py b/app/assets/services/hashing.py index 58c798baa..38aeae4d7 100644 --- a/app/assets/services/hashing.py +++ b/app/assets/services/hashing.py @@ -18,7 +18,7 @@ def compute_blake3_hash( return _hash_file_obj(f, chunk_size) -async def compute_compute_blake3_hash_async( +async def compute_blake3_hash_async( fp: str | IO[bytes], chunk_size: int = DEFAULT_CHUNK, ) -> str: diff --git a/app/assets/services/metadata_extract.py b/app/assets/services/metadata_extract.py index b825479db..d61eae550 100644 --- a/app/assets/services/metadata_extract.py +++ b/app/assets/services/metadata_extract.py @@ -17,6 +17,9 @@ from typing import Any # Supported safetensors extensions SAFETENSORS_EXTENSIONS = frozenset({".safetensors", ".sft"}) +# Maximum safetensors header size to read (8MB) +MAX_SAFETENSORS_HEADER_SIZE = 8 * 1024 * 1024 + @dataclass class ExtractedMetadata: @@ -163,7 +166,7 @@ class ExtractedMetadata: return rows -def _read_safetensors_header(path: str, max_size: int = 8 * 1024 * 1024) -> dict[str, Any] | None: +def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE) -> dict[str, Any] | None: """Read only the JSON header from a safetensors file. This is very fast - reads 8 bytes for header length, then the JSON header.