Tree: Remove fasttensors

Now a noop in upstream. Signed-off-by: kingbri <bdashore3@proton.me>
2026-05-11 08:20:08 +00:00 · 2024-09-30 00:18:32 -04:00
parent 6726014d35
commit 126a44483c
4 changed files with 0 additions and 14 deletions
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -251,9 +251,6 @@ class ExllamaV2Container:
        else:
            self.config.scale_alpha_value = rope_alpha
        # Enable fasttensors loading if present
        self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)
        # Set max batch size to the config override
        self.max_batch_size = unwrap(kwargs.get("max_batch_size"))
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -290,13 +290,6 @@ class ModelConfig(BaseConfigModel):
        ),
        ge=1,
    )
    fasttensors: Optional[bool] = Field(
        False,
        description=(
            "Enables fasttensors to possibly increase model loading speeds "
            "(default: False)."
        ),
    )
    _metadata: Metadata = PrivateAttr(Metadata())
    model_config = ConfigDict(protected_namespaces=())
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -135,9 +135,6 @@ model:
  # WARNING: Don't set this unless you know what you're doing!
  num_experts_per_token:
  # Enables fasttensors to possibly increase model loading speeds (default: False).
  fasttensors: false
 # Options for draft models (speculative decoding)
 # This will use more VRAM!
 draft_model:
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@@ -106,7 +106,6 @@ class ModelLoadRequest(BaseModel):
    chunk_size: Optional[int] = None
    prompt_template: Optional[str] = None
    num_experts_per_token: Optional[int] = None
    fasttensors: Optional[bool] = None
    # Non-config arguments
    draft: Optional[DraftModelLoadRequest] = None