Merge pull request #254 from lucyknada/main

add draft_gpu_split option for spec decoding
2026-03-14 15:57:27 +00:00 · 2025-02-11 16:48:03 -05:00
parent e290b88568 beb6d8faa5
commit 2e491472d1
3 changed files with 59 additions and 16 deletions
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -351,6 +351,13 @@ class DraftModelConfig(BaseConfigModel):
            f"Possible values: {str(CACHE_SIZES)[15:-1]}."
        ),
    )
+    draft_gpu_split: List[float] = Field(
+        default_factory=list,
+        description=(
+            "An integer array of GBs of VRAM to split between GPUs (default: []).\n"
+            "If this isn't filled in, the draft model is autosplit."
+        ),
+    )


 class LoraInstanceModel(BaseConfigModel):