fix line lengths

This commit is contained in:
TerminalMan
2024-09-11 21:43:30 +01:00
parent c6f9806ec6
commit 05f1c3e293

View File

@@ -1,248 +1,331 @@
from pydantic import BaseModel, ConfigDict, Field, model_validator from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing import List, Optional, Union from typing import List, Optional, Union
from common.utils import unwrap from common.utils import unwrap
class config_config_model(BaseModel): class config_config_model(BaseModel):
config: Optional[str] = Field( config: Optional[str] = Field(
None, description="Path to an overriding config.yml file" None, description=("Path to an overriding config.yml file")
) )
class network_config_model(BaseModel): class network_config_model(BaseModel):
host: Optional[str] = Field("127.0.0.1", description="The IP to host on") host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
port: Optional[int] = Field(5000, description="The port to host on") port: Optional[int] = Field(5000, description=("The port to host on"))
disable_auth: Optional[bool] = Field( disable_auth: Optional[bool] = Field(
False, description="Disable HTTP token authentication with requests" False, description=("Disable HTTP token authentication with requests")
) )
send_tracebacks: Optional[bool] = Field( send_tracebacks: Optional[bool] = Field(
False, description="Decide whether to send error tracebacks over the API" False,
) description=("Decide whether to send error tracebacks over the API"),
api_servers: Optional[List[str]] = Field( )
[ api_servers: Optional[List[str]] = Field(
"OAI", [
], "OAI",
description="API servers to enable. Options: (OAI, Kobold)", ],
) description=("API servers to enable. Options: (OAI, Kobold)"),
)
class logging_config_model(BaseModel):
log_prompt: Optional[bool] = Field(False, description="Enable prompt logging") class logging_config_model(BaseModel):
log_generation_params: Optional[bool] = Field( log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
False, description="Enable generation parameter logging" log_generation_params: Optional[bool] = Field(
) False, description=("Enable generation parameter logging")
log_requests: Optional[bool] = Field(False, description="Enable request logging") )
log_requests: Optional[bool] = Field(False, description=("Enable request logging"))
class model_config_model(BaseModel):
model_dir: str = Field( class model_config_model(BaseModel):
"models", model_dir: str = Field(
description="Overrides the directory to look for models (default: models). Windows users, do NOT put this path in quotes.", "models",
) description=(
use_dummy_models: Optional[bool] = Field( "Overrides the directory to look for models (default: models). Windows"
False, "users, do NOT put this path in quotes."
description="Sends dummy model names when the models endpoint is queried. Enable this if looking for specific OAI models.", ),
) )
model_name: Optional[str] = Field( use_dummy_models: Optional[bool] = Field(
None, False,
description="An initial model to load. Make sure the model is located in the model directory! REQUIRED: This must be filled out to load a model on startup.", description=(
) "Sends dummy model names when the models endpoint is queried. Enable this"
use_as_default: List[str] = Field( "if looking for specific OAI models."
default_factory=list, ),
description="Names of args to use as a default fallback for API load requests (default: []). Example: ['max_seq_len', 'cache_mode']", )
) model_name: Optional[str] = Field(
max_seq_len: Optional[int] = Field( None,
None, description=(
description="Max sequence length. Fetched from the model's base sequence length in config.json by default.", "An initial model to load. Make sure the model is located in the model"
) "directory! REQUIRED: This must be filled out to load a model on startup."
override_base_seq_len: Optional[int] = Field( ),
None, )
description="Overrides base model context length. WARNING: Only use this if the model's base sequence length is incorrect.", use_as_default: List[str] = Field(
) default_factory=list,
tensor_parallel: Optional[bool] = Field( description=(
False, "Names of args to use as a default fallback for API load requests"
description="Load model with tensor parallelism. Fallback to autosplit if GPU split isn't provided.", "(default: []). Example: ['max_seq_len', 'cache_mode']"
) ),
gpu_split_auto: Optional[bool] = Field( )
True, max_seq_len: Optional[int] = Field(
description="Automatically allocate resources to GPUs (default: True). Not parsed for single GPU users.", None,
) description=(
autosplit_reserve: List[int] = Field( "Max sequence length. Fetched from the model's base sequence length in"
[96], "config.json by default."
description="Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). Represented as an array of MB per GPU.", ),
) )
gpu_split: List[float] = Field( override_base_seq_len: Optional[int] = Field(
default_factory=list, None,
description="An integer array of GBs of VRAM to split between GPUs (default: []). Used with tensor parallelism.", description=(
) "Overrides base model context length. WARNING: Only use this if the"
rope_scale: Optional[float] = Field( "model's base sequence length is incorrect."
1.0, ),
description="Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the model was trained on long context with rope.", )
) tensor_parallel: Optional[bool] = Field(
rope_alpha: Optional[Union[float, str]] = Field( False,
1.0, description=(
description="Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-calculate.", "Load model with tensor parallelism. Fallback to autosplit if GPU split"
) "isn't provided."
cache_mode: Optional[str] = Field( ),
"FP16", )
description="Enable different cache modes for VRAM savings (default: FP16). Possible values: FP16, Q8, Q6, Q4.", gpu_split_auto: Optional[bool] = Field(
) True,
cache_size: Optional[int] = Field( description=(
None, "Automatically allocate resources to GPUs (default: True). Not parsed for"
description="Size of the prompt cache to allocate (default: max_seq_len). Must be a multiple of 256.", "single GPU users."
) ),
chunk_size: Optional[int] = Field( )
2048, autosplit_reserve: List[int] = Field(
description="Chunk size for prompt ingestion (default: 2048). A lower value reduces VRAM usage but decreases ingestion speed.", [96],
) description=(
max_batch_size: Optional[int] = Field( "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)."
None, "Represented as an array of MB per GPU."
description="Set the maximum number of prompts to process at one time (default: None/Automatic). Automatically calculated if left blank.", ),
) )
prompt_template: Optional[str] = Field( gpu_split: List[float] = Field(
None, default_factory=list,
description="Set the prompt template for this model. If empty, attempts to look for the model's chat template.", description=(
) "An integer array of GBs of VRAM to split between GPUs (default: [])."
num_experts_per_token: Optional[int] = Field( "Used with tensor parallelism."
None, ),
description="Number of experts to use per token. Fetched from the model's config.json. For MoE models only.", )
) rope_scale: Optional[float] = Field(
fasttensors: Optional[bool] = Field( 1.0,
False, description=(
description="Enables fasttensors to possibly increase model loading speeds (default: False).", "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the"
) "model was trained on long context with rope."
),
)
class draft_model_config_model(BaseModel): rope_alpha: Optional[Union[float, str]] = Field(
draft_model_dir: Optional[str] = Field( 1.0,
"models", description=(
description="Overrides the directory to look for draft models (default: models)", "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-"
) "calculate."
draft_model_name: Optional[str] = Field( ),
None, )
description="An initial draft model to load. Ensure the model is in the model directory.", cache_mode: Optional[str] = Field(
) "FP16",
draft_rope_scale: Optional[float] = Field( description=(
1.0, "Enable different cache modes for VRAM savings (default: FP16). Possible"
description="Rope scale for draft models (default: 1.0). Same as compress_pos_emb. Use if the draft model was trained on long context with rope.", "values: FP16, Q8, Q6, Q4."
) ),
draft_rope_alpha: Optional[float] = Field( )
None, cache_size: Optional[int] = Field(
description="Rope alpha for draft models (default: None). Same as alpha_value. Leave blank to auto-calculate the alpha value.", None,
) description=(
draft_cache_mode: Optional[str] = Field( "Size of the prompt cache to allocate (default: max_seq_len). Must be a"
"FP16", "multiple of 256."
description="Cache mode for draft models to save VRAM (default: FP16). Possible values: FP16, Q8, Q6, Q4.", ),
) )
chunk_size: Optional[int] = Field(
2048,
class lora_instance_model(BaseModel): description=(
name: str = Field(..., description="Name of the LoRA model") "Chunk size for prompt ingestion (default: 2048). A lower value reduces"
scaling: float = Field( "VRAM usage but decreases ingestion speed."
1.0, description="Scaling factor for the LoRA model (default: 1.0)" ),
) )
max_batch_size: Optional[int] = Field(
None,
class lora_config_model(BaseModel): description=(
lora_dir: Optional[str] = Field( "Set the maximum number of prompts to process at one time (default:"
"loras", description="Directory to look for LoRAs (default: 'loras')" "None/Automatic). Automatically calculated if left blank."
) ),
loras: Optional[List[lora_instance_model]] = Field( )
None, prompt_template: Optional[str] = Field(
description="List of LoRAs to load and associated scaling factors (default scaling: 1.0)", None,
) description=(
"Set the prompt template for this model. If empty, attempts to look for"
"the model's chat template."
class sampling_config_model(BaseModel): ),
override_preset: Optional[str] = Field( )
None, description="Select a sampler override preset" num_experts_per_token: Optional[int] = Field(
) None,
description=(
"Number of experts to use per token. Fetched from the model's"
class developer_config_model(BaseModel): "config.json. For MoE models only."
unsafe_launch: Optional[bool] = Field( ),
False, description="Skip Exllamav2 version check" )
) fasttensors: Optional[bool] = Field(
disable_request_streaming: Optional[bool] = Field( False,
False, description="Disables API request streaming" description=(
) "Enables fasttensors to possibly increase model loading speeds (default:"
cuda_malloc_backend: Optional[bool] = Field( "False)."
False, description="Runs with the pytorch CUDA malloc backend" ),
) )
uvloop: Optional[bool] = Field(
False, description="Run asyncio using Uvloop or Winloop"
) class draft_model_config_model(BaseModel):
realtime_process_priority: Optional[bool] = Field( draft_model_dir: Optional[str] = Field(
False, "models",
description="Set process to use a higher priority For realtime process priority, run as administrator or sudo Otherwise, the priority will be set to high", description=(
) "Overrides the directory to look for draft models (default: models)"
),
)
class embeddings_config_model(BaseModel): draft_model_name: Optional[str] = Field(
embedding_model_dir: Optional[str] = Field( None,
"models", description=(
description="Overrides directory to look for embedding models (default: models)", "An initial draft model to load. Ensure the model is in the model"
) "directory."
embeddings_device: Optional[str] = Field( ),
"cpu", )
description="Device to load embedding models on (default: cpu). Possible values: cpu, auto, cuda. If using an AMD GPU, set this value to 'cuda'.", draft_rope_scale: Optional[float] = Field(
) 1.0,
embedding_model_name: Optional[str] = Field( description=(
None, description="The embeddings model to load" "Rope scale for draft models (default: 1.0). Same as compress_pos_emb."
) "Use if the draft model was trained on long context with rope."
),
)
class tabby_config_model(BaseModel): draft_rope_alpha: Optional[float] = Field(
config: config_config_model = Field(default_factory=config_config_model) None,
network: network_config_model = Field(default_factory=network_config_model) description=(
logging: logging_config_model = Field(default_factory=logging_config_model) "Rope alpha for draft models (default: None). Same as alpha_value. Leave"
model: model_config_model = Field(default_factory=model_config_model) "blank to auto-calculate the alpha value."
draft_model: draft_model_config_model = Field( ),
default_factory=draft_model_config_model )
) draft_cache_mode: Optional[str] = Field(
lora: lora_config_model = Field(default_factory=lora_config_model) "FP16",
sampling: sampling_config_model = Field(default_factory=sampling_config_model) description=(
developer: developer_config_model = Field(default_factory=developer_config_model) "Cache mode for draft models to save VRAM (default: FP16). Possible"
embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model) "values: FP16, Q8, Q6, Q4."
),
@model_validator(mode="before") )
def set_defaults(cls, values):
for field_name, field_value in values.items():
if field_value is None: class lora_instance_model(BaseModel):
default_instance = cls.__annotations__[field_name]().dict() name: str = Field(..., description=("Name of the LoRA model"))
values[field_name] = cls.__annotations__[field_name](**default_instance) scaling: float = Field(
return values 1.0, description=("Scaling factor for the LoRA model (default: 1.0)")
)
model_config = ConfigDict(validate_assignment=True)
class lora_config_model(BaseModel):
def generate_config_file(filename="config_sample.yml", indentation=2): lora_dir: Optional[str] = Field(
schema = tabby_config_model.model_json_schema() "loras", description=("Directory to look for LoRAs (default: 'loras')")
)
def dump_def(id: str, indent=2): loras: Optional[List[lora_instance_model]] = Field(
yaml = "" None,
indent = " " * indentation * indent description=(
id = id.split("/")[-1] "List of LoRAs to load and associated scaling factors (default scaling:"
"1.0)"
section = schema["$defs"][id]["properties"] ),
for property in section.keys(): # get type )
comment = section[property]["description"]
yaml += f"{indent}# {comment}\n"
class sampling_config_model(BaseModel):
value = unwrap(section[property].get("default"), "") override_preset: Optional[str] = Field(
yaml += f"{indent}{property}: {value}\n\n" None, description=("Select a sampler override preset")
)
return yaml + "\n"
yaml = "" class developer_config_model(BaseModel):
for section in schema["properties"].keys(): unsafe_launch: Optional[bool] = Field(
yaml += f"{section}:\n" False, description=("Skip Exllamav2 version check")
yaml += dump_def(schema["properties"][section]["$ref"]) )
yaml += "\n" disable_request_streaming: Optional[bool] = Field(
False, description=("Disables API request streaming")
with open(filename, "w") as f: )
f.write(yaml) cuda_malloc_backend: Optional[bool] = Field(
False, description=("Runs with the pytorch CUDA malloc backend")
)
# generate_config_file("test.yml") uvloop: Optional[bool] = Field(
False, description=("Run asyncio using Uvloop or Winloop")
)
realtime_process_priority: Optional[bool] = Field(
False,
description=(
"Set process to use a higher priority For realtime process priority, run"
"as administrator or sudo Otherwise, the priority will be set to high"
),
)
class embeddings_config_model(BaseModel):
embedding_model_dir: Optional[str] = Field(
"models",
description=(
"Overrides directory to look for embedding models (default: models)"
),
)
embeddings_device: Optional[str] = Field(
"cpu",
description=(
"Device to load embedding models on (default: cpu). Possible values: cpu,"
"auto, cuda. If using an AMD GPU, set this value to 'cuda'."
),
)
embedding_model_name: Optional[str] = Field(
None, description=("The embeddings model to load")
)
class tabby_config_model(BaseModel):
config: config_config_model = Field(default_factory=config_config_model)
network: network_config_model = Field(default_factory=network_config_model)
logging: logging_config_model = Field(default_factory=logging_config_model)
model: model_config_model = Field(default_factory=model_config_model)
draft_model: draft_model_config_model = Field(
default_factory=draft_model_config_model
)
lora: lora_config_model = Field(default_factory=lora_config_model)
sampling: sampling_config_model = Field(default_factory=sampling_config_model)
developer: developer_config_model = Field(default_factory=developer_config_model)
embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model)
@model_validator(mode="before")
def set_defaults(cls, values):
for field_name, field_value in values.items():
if field_value is None:
default_instance = cls.__annotations__[field_name]().dict()
values[field_name] = cls.__annotations__[field_name](**default_instance)
return values
model_config = ConfigDict(validate_assignment=True)
def generate_config_file(filename="config_sample.yml", indentation=2):
schema = tabby_config_model.model_json_schema()
def dump_def(id: str, indent=2):
yaml = ""
indent = " " * indentation * indent
id = id.split("/")[-1]
section = schema["$defs"][id]["properties"]
for property in section.keys(): # get type
comment = section[property]["description"]
yaml += f"{indent}# {comment}\n"
value = unwrap(section[property].get("default"), "")
yaml += f"{indent}{property}: {value}\n\n"
return yaml + "\n"
yaml = ""
for section in schema["properties"].keys():
yaml += f"{section}:\n"
yaml += dump_def(schema["properties"][section]["$ref"])
yaml += "\n"
with open(filename, "w") as f:
f.write(yaml)
# generate_config_file("test.yml")