mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-03-14 15:57:27 +00:00
Model: Add support for num_experts_by_token
New parameter that's safe to edit in exllamav2 v0.0.11. Only recommended for people who know what they're doing. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
8
model.py
8
model.py
@@ -105,6 +105,14 @@ class ModelContainer:
|
||||
# Set prompt template override if provided
|
||||
self.prompt_template = kwargs.get("prompt_template")
|
||||
|
||||
# Set num of experts per token if provided
|
||||
num_experts_override = kwargs.get("num_experts_per_token")
|
||||
if num_experts_override:
|
||||
if hasattr(self.config, "num_experts_per_token"):
|
||||
self.config.num_experts_per_token = num_experts_override
|
||||
else:
|
||||
print(" !! Warning: Currently installed ExLlamaV2 does not support overriding MoE experts")
|
||||
|
||||
chunk_size = min(unwrap(kwargs.get("chunk_size"), 2048), self.config.max_seq_len)
|
||||
self.config.max_input_len = chunk_size
|
||||
self.config.max_attn_size = chunk_size ** 2
|
||||
|
||||
Reference in New Issue
Block a user