Merge pull request #254 from lucyknada/main

add draft_gpu_split option for spec decoding
This commit is contained in:
Brian
2025-02-11 16:48:03 -05:00
committed by GitHub
3 changed files with 59 additions and 16 deletions

View File

@@ -351,6 +351,13 @@ class DraftModelConfig(BaseConfigModel):
f"Possible values: {str(CACHE_SIZES)[15:-1]}."
),
)
draft_gpu_split: List[float] = Field(
default_factory=list,
description=(
"An integer array of GBs of VRAM to split between GPUs (default: []).\n"
"If this isn't filled in, the draft model is autosplit."
),
)
class LoraInstanceModel(BaseConfigModel):