mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-15 00:07:28 +00:00
Config + Endpoints: Make cache_size more prominent
Since cache_size is a more important parameter now for multi-user setups, mark it as such by placing it below max_seq_len. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
@@ -180,6 +180,25 @@ class ModelConfig(BaseConfigModel):
|
||||
),
|
||||
ge=-1,
|
||||
)
|
||||
cache_size: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Size of the prompt cache to allocate (default: max_seq_len).\n"
|
||||
"Must be a multiple of 256 and can't be less than max_seq_len.\n"
|
||||
"For CFG, set this to 2 * max_seq_len."
|
||||
),
|
||||
multiple_of=256,
|
||||
gt=0,
|
||||
)
|
||||
cache_mode: Optional[CACHE_TYPE] = Field(
|
||||
"FP16",
|
||||
description=(
|
||||
"Enable different cache modes for VRAM savings (default: FP16).\n"
|
||||
f"Possible values for exllamav2: {str(CACHE_SIZES)[15:-1]}.\n"
|
||||
"For exllamav3, specify the pair k_bits,v_bits where k_bits and v_bits "
|
||||
"are integers from 2-8 (i.e. 8,8)."
|
||||
),
|
||||
)
|
||||
tensor_parallel: Optional[bool] = Field(
|
||||
False,
|
||||
description=(
|
||||
@@ -236,25 +255,6 @@ class ModelConfig(BaseConfigModel):
|
||||
"or auto-calculate."
|
||||
),
|
||||
)
|
||||
cache_mode: Optional[CACHE_TYPE] = Field(
|
||||
"FP16",
|
||||
description=(
|
||||
"Enable different cache modes for VRAM savings (default: FP16).\n"
|
||||
f"Possible values for exllamav2: {str(CACHE_SIZES)[15:-1]}.\n"
|
||||
"For exllamav3, specify the pair k_bits,v_bits where k_bits and v_bits "
|
||||
"are integers from 2-8 (i.e. 8,8)."
|
||||
),
|
||||
)
|
||||
cache_size: Optional[int] = Field(
|
||||
None,
|
||||
description=(
|
||||
"Size of the prompt cache to allocate (default: max_seq_len).\n"
|
||||
"Must be a multiple of 256 and can't be less than max_seq_len.\n"
|
||||
"For CFG, set this to 2 * max_seq_len."
|
||||
),
|
||||
multiple_of=256,
|
||||
gt=0,
|
||||
)
|
||||
chunk_size: Optional[int] = Field(
|
||||
2048,
|
||||
description=(
|
||||
|
||||
Reference in New Issue
Block a user