Config: Fix descriptions

Appending lines also requires a space between each one otherwise
they'll squish together.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2024-09-12 22:43:30 -04:00
parent 21747bf9e4
commit d5b3fde319

View File

@@ -40,133 +40,133 @@ class ModelConfig(BaseModel):
model_dir: str = Field( model_dir: str = Field(
"models", "models",
description=( description=(
"Overrides the directory to look for models (default: models). Windows" "Overrides the directory to look for models (default: models). Windows "
"users, do NOT put this path in quotes." "users, do NOT put this path in quotes."
), ),
) )
use_dummy_models: Optional[bool] = Field( use_dummy_models: Optional[bool] = Field(
False, False,
description=( description=(
"Sends dummy model names when the models endpoint is queried. Enable this" "Sends dummy model names when the models endpoint is queried. Enable this "
"if looking for specific OAI models." "if looking for specific OAI models."
), ),
) )
model_name: Optional[str] = Field( model_name: Optional[str] = Field(
None, None,
description=( description=(
"An initial model to load. Make sure the model is located in the model" "An initial model to load. Make sure the model is located in the model "
"directory! REQUIRED: This must be filled out to load a model on startup." "directory! REQUIRED: This must be filled out to load a model on startup."
), ),
) )
use_as_default: List[str] = Field( use_as_default: List[str] = Field(
default_factory=list, default_factory=list,
description=( description=(
"Names of args to use as a default fallback for API load requests" "Names of args to use as a default fallback for API load requests "
"(default: []). Example: ['max_seq_len', 'cache_mode']" "(default: []). Example: ['max_seq_len', 'cache_mode']"
), ),
) )
max_seq_len: Optional[int] = Field( max_seq_len: Optional[int] = Field(
None, None,
description=( description=(
"Max sequence length. Fetched from the model's base sequence length in" "Max sequence length. Fetched from the model's base sequence length in "
"config.json by default." "config.json by default."
), ),
) )
override_base_seq_len: Optional[int] = Field( override_base_seq_len: Optional[int] = Field(
None, None,
description=( description=(
"Overrides base model context length. WARNING: Only use this if the" "Overrides base model context length. WARNING: Only use this if the "
"model's base sequence length is incorrect." "model's base sequence length is incorrect."
), ),
) )
tensor_parallel: Optional[bool] = Field( tensor_parallel: Optional[bool] = Field(
False, False,
description=( description=(
"Load model with tensor parallelism. Fallback to autosplit if GPU split" "Load model with tensor parallelism. Fallback to autosplit if GPU split "
"isn't provided." "isn't provided."
), ),
) )
gpu_split_auto: Optional[bool] = Field( gpu_split_auto: Optional[bool] = Field(
True, True,
description=( description=(
"Automatically allocate resources to GPUs (default: True). Not parsed for" "Automatically allocate resources to GPUs (default: True). Not parsed for "
"single GPU users." "single GPU users."
), ),
) )
autosplit_reserve: List[int] = Field( autosplit_reserve: List[int] = Field(
[96], [96],
description=( description=(
"Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)." "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
"Represented as an array of MB per GPU." "Represented as an array of MB per GPU."
), ),
) )
gpu_split: List[float] = Field( gpu_split: List[float] = Field(
default_factory=list, default_factory=list,
description=( description=(
"An integer array of GBs of VRAM to split between GPUs (default: [])." "An integer array of GBs of VRAM to split between GPUs (default: []). "
"Used with tensor parallelism." "Used with tensor parallelism."
), ),
) )
rope_scale: Optional[float] = Field( rope_scale: Optional[float] = Field(
1.0, 1.0,
description=( description=(
"Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the" "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
"model was trained on long context with rope." "model was trained on long context with rope."
), ),
) )
rope_alpha: Optional[Union[float, str]] = Field( rope_alpha: Optional[Union[float, str]] = Field(
1.0, 1.0,
description=( description=(
"Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-" "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
"calculate." "calculate."
), ),
) )
cache_mode: Optional[str] = Field( cache_mode: Optional[str] = Field(
"FP16", "FP16",
description=( description=(
"Enable different cache modes for VRAM savings (default: FP16). Possible" "Enable different cache modes for VRAM savings (default: FP16). Possible "
"values: FP16, Q8, Q6, Q4." "values: FP16, Q8, Q6, Q4."
), ),
) )
cache_size: Optional[int] = Field( cache_size: Optional[int] = Field(
None, None,
description=( description=(
"Size of the prompt cache to allocate (default: max_seq_len). Must be a" "Size of the prompt cache to allocate (default: max_seq_len). Must be a "
"multiple of 256." "multiple of 256."
), ),
) )
chunk_size: Optional[int] = Field( chunk_size: Optional[int] = Field(
2048, 2048,
description=( description=(
"Chunk size for prompt ingestion (default: 2048). A lower value reduces" "Chunk size for prompt ingestion (default: 2048). A lower value reduces "
"VRAM usage but decreases ingestion speed." "VRAM usage but decreases ingestion speed."
), ),
) )
max_batch_size: Optional[int] = Field( max_batch_size: Optional[int] = Field(
None, None,
description=( description=(
"Set the maximum number of prompts to process at one time (default:" "Set the maximum number of prompts to process at one time (default: "
"None/Automatic). Automatically calculated if left blank." "None/Automatic). Automatically calculated if left blank."
), ),
) )
prompt_template: Optional[str] = Field( prompt_template: Optional[str] = Field(
None, None,
description=( description=(
"Set the prompt template for this model. If empty, attempts to look for" "Set the prompt template for this model. If empty, attempts to look for "
"the model's chat template." "the model's chat template."
), ),
) )
num_experts_per_token: Optional[int] = Field( num_experts_per_token: Optional[int] = Field(
None, None,
description=( description=(
"Number of experts to use per token. Fetched from the model's" "Number of experts to use per token. Fetched from the model's "
"config.json. For MoE models only." "config.json. For MoE models only."
), ),
) )
fasttensors: Optional[bool] = Field( fasttensors: Optional[bool] = Field(
False, False,
description=( description=(
"Enables fasttensors to possibly increase model loading speeds (default:" "Enables fasttensors to possibly increase model loading speeds (default: "
"False)." "False)."
), ),
) )
@@ -191,21 +191,21 @@ class DraftModelConfig(BaseModel):
draft_rope_scale: Optional[float] = Field( draft_rope_scale: Optional[float] = Field(
1.0, 1.0,
description=( description=(
"Rope scale for draft models (default: 1.0). Same as compress_pos_emb." "Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
"Use if the draft model was trained on long context with rope." "Use if the draft model was trained on long context with rope."
), ),
) )
draft_rope_alpha: Optional[float] = Field( draft_rope_alpha: Optional[float] = Field(
None, None,
description=( description=(
"Rope alpha for draft models (default: None). Same as alpha_value. Leave" "Rope alpha for draft models (default: None). Same as alpha_value. Leave "
"blank to auto-calculate the alpha value." "blank to auto-calculate the alpha value."
), ),
) )
draft_cache_mode: Optional[str] = Field( draft_cache_mode: Optional[str] = Field(
"FP16", "FP16",
description=( description=(
"Cache mode for draft models to save VRAM (default: FP16). Possible" "Cache mode for draft models to save VRAM (default: FP16). Possible "
"values: FP16, Q8, Q6, Q4." "values: FP16, Q8, Q6, Q4."
), ),
) )
@@ -225,7 +225,7 @@ class LoraConfig(BaseModel):
loras: Optional[List[LoraInstanceModel]] = Field( loras: Optional[List[LoraInstanceModel]] = Field(
None, None,
description=( description=(
"List of LoRAs to load and associated scaling factors (default scaling:" "List of LoRAs to load and associated scaling factors (default scaling: "
"1.0)" "1.0)"
), ),
) )
@@ -253,7 +253,7 @@ class DeveloperConfig(BaseModel):
realtime_process_priority: Optional[bool] = Field( realtime_process_priority: Optional[bool] = Field(
False, False,
description=( description=(
"Set process to use a higher priority For realtime process priority, run" "Set process to use a higher priority For realtime process priority, run "
"as administrator or sudo Otherwise, the priority will be set to high" "as administrator or sudo Otherwise, the priority will be set to high"
), ),
) )
@@ -269,7 +269,7 @@ class EmbeddingsConfig(BaseModel):
embeddings_device: Optional[str] = Field( embeddings_device: Optional[str] = Field(
"cpu", "cpu",
description=( description=(
"Device to load embedding models on (default: cpu). Possible values: cpu," "Device to load embedding models on (default: cpu). Possible values: cpu, "
"auto, cuda. If using an AMD GPU, set this value to 'cuda'." "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
), ),
) )