mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-29 02:31:48 +00:00
API: Fix num_experts_per_token reporting
This wasn't linked to the model config. This value can be 1 if a MoE model isn't loaded. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -77,7 +77,7 @@ model:
|
|||||||
# NOTE: Only works with chat completion message lists!
|
# NOTE: Only works with chat completion message lists!
|
||||||
prompt_template:
|
prompt_template:
|
||||||
|
|
||||||
# Number of experts to use per token. Loads from the model's config.json if not specified (default: None)
|
# Number of experts to use PER TOKEN. Fetched from the model's config.json if not specified (default: None)
|
||||||
# WARNING: Don't set this unless you know what you're doing!
|
# WARNING: Don't set this unless you know what you're doing!
|
||||||
# NOTE: For MoE models (ex. Mixtral) only!
|
# NOTE: For MoE models (ex. Mixtral) only!
|
||||||
num_experts_per_token:
|
num_experts_per_token:
|
||||||
|
|||||||
1
main.py
1
main.py
@@ -111,6 +111,7 @@ async def get_current_model():
|
|||||||
max_seq_len=MODEL_CONTAINER.config.max_seq_len,
|
max_seq_len=MODEL_CONTAINER.config.max_seq_len,
|
||||||
cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
|
cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
|
||||||
prompt_template=prompt_template.name if prompt_template else None,
|
prompt_template=prompt_template.name if prompt_template else None,
|
||||||
|
num_experts_per_token=MODEL_CONTAINER.config.num_experts_per_token,
|
||||||
),
|
),
|
||||||
logging=gen_logging.PREFERENCES,
|
logging=gen_logging.PREFERENCES,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user