Config + Endpoints: Make cache_size more prominent

Since cache_size is a more important parameter now for multi-user setups, mark it as such by placing it below max_seq_len. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2026-03-15 00:07:28 +00:00 · 2025-10-14 21:53:33 -04:00
parent 62e9fa217a
commit 69a25d7fa6
3 changed files with 31 additions and 31 deletions
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -180,6 +180,25 @@ class ModelConfig(BaseConfigModel):
        ),
        ge=-1,
    )
+    cache_size: Optional[int] = Field(
+        None,
+        description=(
+            "Size of the prompt cache to allocate (default: max_seq_len).\n"
+            "Must be a multiple of 256 and can't be less than max_seq_len.\n"
+            "For CFG, set this to 2 * max_seq_len."
+        ),
+        multiple_of=256,
+        gt=0,
+    )
+    cache_mode: Optional[CACHE_TYPE] = Field(
+        "FP16",
+        description=(
+            "Enable different cache modes for VRAM savings (default: FP16).\n"
+            f"Possible values for exllamav2: {str(CACHE_SIZES)[15:-1]}.\n"
+            "For exllamav3, specify the pair k_bits,v_bits where k_bits and v_bits "
+            "are integers from 2-8 (i.e. 8,8)."
+        ),
+    )
    tensor_parallel: Optional[bool] = Field(
        False,
        description=(
@@ -236,25 +255,6 @@ class ModelConfig(BaseConfigModel):
            "or auto-calculate."
        ),
    )
-    cache_mode: Optional[CACHE_TYPE] = Field(
-        "FP16",
-        description=(
-            "Enable different cache modes for VRAM savings (default: FP16).\n"
-            f"Possible values for exllamav2: {str(CACHE_SIZES)[15:-1]}.\n"
-            "For exllamav3, specify the pair k_bits,v_bits where k_bits and v_bits "
-            "are integers from 2-8 (i.e. 8,8)."
-        ),
-    )
-    cache_size: Optional[int] = Field(
-        None,
-        description=(
-            "Size of the prompt cache to allocate (default: max_seq_len).\n"
-            "Must be a multiple of 256 and can't be less than max_seq_len.\n"
-            "For CFG, set this to 2 * max_seq_len."
-        ),
-        multiple_of=256,
-        gt=0,
-    )
    chunk_size: Optional[int] = Field(
        2048,
        description=(