Fix magic number and function name typo

- Add MAX_SAFETENSORS_HEADER_SIZE constant in metadata_extract.py
- Fix double 'compute' typo: compute_compute_blake3_hash_async → compute_blake3_hash_async

Amp-Thread-ID: https://ampcode.com/threads/T-019c3550-4dbc-7301-a5e8-e6e23aa2d7b1
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-06 15:37:18 -08:00
parent 3a5b71a929
commit 882ae8df12
2 changed files with 5 additions and 2 deletions

View File

@@ -18,7 +18,7 @@ def compute_blake3_hash(
return _hash_file_obj(f, chunk_size)
async def compute_compute_blake3_hash_async(
async def compute_blake3_hash_async(
fp: str | IO[bytes],
chunk_size: int = DEFAULT_CHUNK,
) -> str:

View File

@@ -17,6 +17,9 @@ from typing import Any
# Supported safetensors extensions
SAFETENSORS_EXTENSIONS = frozenset({".safetensors", ".sft"})
# Maximum safetensors header size to read (8MB)
MAX_SAFETENSORS_HEADER_SIZE = 8 * 1024 * 1024
@dataclass
class ExtractedMetadata:
@@ -163,7 +166,7 @@ class ExtractedMetadata:
return rows
def _read_safetensors_header(path: str, max_size: int = 8 * 1024 * 1024) -> dict[str, Any] | None:
def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE) -> dict[str, Any] | None:
"""Read only the JSON header from a safetensors file.
This is very fast - reads 8 bytes for header length, then the JSON header.