mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-29 18:51:53 +00:00
Config: Fix descriptions
Appending lines also requires a space between each one otherwise they'll squish together. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -40,133 +40,133 @@ class ModelConfig(BaseModel):
|
|||||||
model_dir: str = Field(
|
model_dir: str = Field(
|
||||||
"models",
|
"models",
|
||||||
description=(
|
description=(
|
||||||
"Overrides the directory to look for models (default: models). Windows"
|
"Overrides the directory to look for models (default: models). Windows "
|
||||||
"users, do NOT put this path in quotes."
|
"users, do NOT put this path in quotes."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
use_dummy_models: Optional[bool] = Field(
|
use_dummy_models: Optional[bool] = Field(
|
||||||
False,
|
False,
|
||||||
description=(
|
description=(
|
||||||
"Sends dummy model names when the models endpoint is queried. Enable this"
|
"Sends dummy model names when the models endpoint is queried. Enable this "
|
||||||
"if looking for specific OAI models."
|
"if looking for specific OAI models."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
model_name: Optional[str] = Field(
|
model_name: Optional[str] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"An initial model to load. Make sure the model is located in the model"
|
"An initial model to load. Make sure the model is located in the model "
|
||||||
"directory! REQUIRED: This must be filled out to load a model on startup."
|
"directory! REQUIRED: This must be filled out to load a model on startup."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
use_as_default: List[str] = Field(
|
use_as_default: List[str] = Field(
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
description=(
|
description=(
|
||||||
"Names of args to use as a default fallback for API load requests"
|
"Names of args to use as a default fallback for API load requests "
|
||||||
"(default: []). Example: ['max_seq_len', 'cache_mode']"
|
"(default: []). Example: ['max_seq_len', 'cache_mode']"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
max_seq_len: Optional[int] = Field(
|
max_seq_len: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Max sequence length. Fetched from the model's base sequence length in"
|
"Max sequence length. Fetched from the model's base sequence length in "
|
||||||
"config.json by default."
|
"config.json by default."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
override_base_seq_len: Optional[int] = Field(
|
override_base_seq_len: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Overrides base model context length. WARNING: Only use this if the"
|
"Overrides base model context length. WARNING: Only use this if the "
|
||||||
"model's base sequence length is incorrect."
|
"model's base sequence length is incorrect."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
tensor_parallel: Optional[bool] = Field(
|
tensor_parallel: Optional[bool] = Field(
|
||||||
False,
|
False,
|
||||||
description=(
|
description=(
|
||||||
"Load model with tensor parallelism. Fallback to autosplit if GPU split"
|
"Load model with tensor parallelism. Fallback to autosplit if GPU split "
|
||||||
"isn't provided."
|
"isn't provided."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
gpu_split_auto: Optional[bool] = Field(
|
gpu_split_auto: Optional[bool] = Field(
|
||||||
True,
|
True,
|
||||||
description=(
|
description=(
|
||||||
"Automatically allocate resources to GPUs (default: True). Not parsed for"
|
"Automatically allocate resources to GPUs (default: True). Not parsed for "
|
||||||
"single GPU users."
|
"single GPU users."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
autosplit_reserve: List[int] = Field(
|
autosplit_reserve: List[int] = Field(
|
||||||
[96],
|
[96],
|
||||||
description=(
|
description=(
|
||||||
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)."
|
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
|
||||||
"Represented as an array of MB per GPU."
|
"Represented as an array of MB per GPU."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
gpu_split: List[float] = Field(
|
gpu_split: List[float] = Field(
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
description=(
|
description=(
|
||||||
"An integer array of GBs of VRAM to split between GPUs (default: [])."
|
"An integer array of GBs of VRAM to split between GPUs (default: []). "
|
||||||
"Used with tensor parallelism."
|
"Used with tensor parallelism."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
rope_scale: Optional[float] = Field(
|
rope_scale: Optional[float] = Field(
|
||||||
1.0,
|
1.0,
|
||||||
description=(
|
description=(
|
||||||
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the"
|
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
|
||||||
"model was trained on long context with rope."
|
"model was trained on long context with rope."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
rope_alpha: Optional[Union[float, str]] = Field(
|
rope_alpha: Optional[Union[float, str]] = Field(
|
||||||
1.0,
|
1.0,
|
||||||
description=(
|
description=(
|
||||||
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-"
|
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
|
||||||
"calculate."
|
"calculate."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
cache_mode: Optional[str] = Field(
|
cache_mode: Optional[str] = Field(
|
||||||
"FP16",
|
"FP16",
|
||||||
description=(
|
description=(
|
||||||
"Enable different cache modes for VRAM savings (default: FP16). Possible"
|
"Enable different cache modes for VRAM savings (default: FP16). Possible "
|
||||||
"values: FP16, Q8, Q6, Q4."
|
"values: FP16, Q8, Q6, Q4."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
cache_size: Optional[int] = Field(
|
cache_size: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Size of the prompt cache to allocate (default: max_seq_len). Must be a"
|
"Size of the prompt cache to allocate (default: max_seq_len). Must be a "
|
||||||
"multiple of 256."
|
"multiple of 256."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
chunk_size: Optional[int] = Field(
|
chunk_size: Optional[int] = Field(
|
||||||
2048,
|
2048,
|
||||||
description=(
|
description=(
|
||||||
"Chunk size for prompt ingestion (default: 2048). A lower value reduces"
|
"Chunk size for prompt ingestion (default: 2048). A lower value reduces "
|
||||||
"VRAM usage but decreases ingestion speed."
|
"VRAM usage but decreases ingestion speed."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
max_batch_size: Optional[int] = Field(
|
max_batch_size: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Set the maximum number of prompts to process at one time (default:"
|
"Set the maximum number of prompts to process at one time (default: "
|
||||||
"None/Automatic). Automatically calculated if left blank."
|
"None/Automatic). Automatically calculated if left blank."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
prompt_template: Optional[str] = Field(
|
prompt_template: Optional[str] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Set the prompt template for this model. If empty, attempts to look for"
|
"Set the prompt template for this model. If empty, attempts to look for "
|
||||||
"the model's chat template."
|
"the model's chat template."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
num_experts_per_token: Optional[int] = Field(
|
num_experts_per_token: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Number of experts to use per token. Fetched from the model's"
|
"Number of experts to use per token. Fetched from the model's "
|
||||||
"config.json. For MoE models only."
|
"config.json. For MoE models only."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
fasttensors: Optional[bool] = Field(
|
fasttensors: Optional[bool] = Field(
|
||||||
False,
|
False,
|
||||||
description=(
|
description=(
|
||||||
"Enables fasttensors to possibly increase model loading speeds (default:"
|
"Enables fasttensors to possibly increase model loading speeds (default: "
|
||||||
"False)."
|
"False)."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -191,21 +191,21 @@ class DraftModelConfig(BaseModel):
|
|||||||
draft_rope_scale: Optional[float] = Field(
|
draft_rope_scale: Optional[float] = Field(
|
||||||
1.0,
|
1.0,
|
||||||
description=(
|
description=(
|
||||||
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb."
|
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
|
||||||
"Use if the draft model was trained on long context with rope."
|
"Use if the draft model was trained on long context with rope."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
draft_rope_alpha: Optional[float] = Field(
|
draft_rope_alpha: Optional[float] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"Rope alpha for draft models (default: None). Same as alpha_value. Leave"
|
"Rope alpha for draft models (default: None). Same as alpha_value. Leave "
|
||||||
"blank to auto-calculate the alpha value."
|
"blank to auto-calculate the alpha value."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
draft_cache_mode: Optional[str] = Field(
|
draft_cache_mode: Optional[str] = Field(
|
||||||
"FP16",
|
"FP16",
|
||||||
description=(
|
description=(
|
||||||
"Cache mode for draft models to save VRAM (default: FP16). Possible"
|
"Cache mode for draft models to save VRAM (default: FP16). Possible "
|
||||||
"values: FP16, Q8, Q6, Q4."
|
"values: FP16, Q8, Q6, Q4."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -225,7 +225,7 @@ class LoraConfig(BaseModel):
|
|||||||
loras: Optional[List[LoraInstanceModel]] = Field(
|
loras: Optional[List[LoraInstanceModel]] = Field(
|
||||||
None,
|
None,
|
||||||
description=(
|
description=(
|
||||||
"List of LoRAs to load and associated scaling factors (default scaling:"
|
"List of LoRAs to load and associated scaling factors (default scaling: "
|
||||||
"1.0)"
|
"1.0)"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -253,7 +253,7 @@ class DeveloperConfig(BaseModel):
|
|||||||
realtime_process_priority: Optional[bool] = Field(
|
realtime_process_priority: Optional[bool] = Field(
|
||||||
False,
|
False,
|
||||||
description=(
|
description=(
|
||||||
"Set process to use a higher priority For realtime process priority, run"
|
"Set process to use a higher priority For realtime process priority, run "
|
||||||
"as administrator or sudo Otherwise, the priority will be set to high"
|
"as administrator or sudo Otherwise, the priority will be set to high"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -269,7 +269,7 @@ class EmbeddingsConfig(BaseModel):
|
|||||||
embeddings_device: Optional[str] = Field(
|
embeddings_device: Optional[str] = Field(
|
||||||
"cpu",
|
"cpu",
|
||||||
description=(
|
description=(
|
||||||
"Device to load embedding models on (default: cpu). Possible values: cpu,"
|
"Device to load embedding models on (default: cpu). Possible values: cpu, "
|
||||||
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
|
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user