mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Tree: Remove fasttensors
Now a noop in upstream. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -251,9 +251,6 @@ class ExllamaV2Container:
|
|||||||
else:
|
else:
|
||||||
self.config.scale_alpha_value = rope_alpha
|
self.config.scale_alpha_value = rope_alpha
|
||||||
|
|
||||||
# Enable fasttensors loading if present
|
|
||||||
self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)
|
|
||||||
|
|
||||||
# Set max batch size to the config override
|
# Set max batch size to the config override
|
||||||
self.max_batch_size = unwrap(kwargs.get("max_batch_size"))
|
self.max_batch_size = unwrap(kwargs.get("max_batch_size"))
|
||||||
|
|
||||||
|
|||||||
@@ -290,13 +290,6 @@ class ModelConfig(BaseConfigModel):
|
|||||||
),
|
),
|
||||||
ge=1,
|
ge=1,
|
||||||
)
|
)
|
||||||
fasttensors: Optional[bool] = Field(
|
|
||||||
False,
|
|
||||||
description=(
|
|
||||||
"Enables fasttensors to possibly increase model loading speeds "
|
|
||||||
"(default: False)."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
_metadata: Metadata = PrivateAttr(Metadata())
|
_metadata: Metadata = PrivateAttr(Metadata())
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|||||||
@@ -135,9 +135,6 @@ model:
|
|||||||
# WARNING: Don't set this unless you know what you're doing!
|
# WARNING: Don't set this unless you know what you're doing!
|
||||||
num_experts_per_token:
|
num_experts_per_token:
|
||||||
|
|
||||||
# Enables fasttensors to possibly increase model loading speeds (default: False).
|
|
||||||
fasttensors: false
|
|
||||||
|
|
||||||
# Options for draft models (speculative decoding)
|
# Options for draft models (speculative decoding)
|
||||||
# This will use more VRAM!
|
# This will use more VRAM!
|
||||||
draft_model:
|
draft_model:
|
||||||
|
|||||||
@@ -106,7 +106,6 @@ class ModelLoadRequest(BaseModel):
|
|||||||
chunk_size: Optional[int] = None
|
chunk_size: Optional[int] = None
|
||||||
prompt_template: Optional[str] = None
|
prompt_template: Optional[str] = None
|
||||||
num_experts_per_token: Optional[int] = None
|
num_experts_per_token: Optional[int] = None
|
||||||
fasttensors: Optional[bool] = None
|
|
||||||
|
|
||||||
# Non-config arguments
|
# Non-config arguments
|
||||||
draft: Optional[DraftModelLoadRequest] = None
|
draft: Optional[DraftModelLoadRequest] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user