From 3622710582a8c286b6df6e402dfd279114a668ef Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Thu, 28 Dec 2023 00:31:14 -0500
Subject: [PATCH] API: Fix num_experts_per_token reporting

This wasn't linked to the model config. This value can be 1 if
a MoE model isn't loaded.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 config_sample.yml | 2 +-
 main.py           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/config_sample.yml b/config_sample.yml
index 435e91d..f3ca708 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -77,7 +77,7 @@ model:
   # NOTE: Only works with chat completion message lists!
   prompt_template:
 
-  # Number of experts to use per token. Loads from the model's config.json if not specified (default: None)
+  # Number of experts to use PER TOKEN. Fetched from the model's config.json if not specified (default: None)
   # WARNING: Don't set this unless you know what you're doing!
   # NOTE: For MoE models (ex. Mixtral) only!
   num_experts_per_token:
diff --git a/main.py b/main.py
index cd33a49..7f6822f 100644
--- a/main.py
+++ b/main.py
@@ -111,6 +111,7 @@ async def get_current_model():
             max_seq_len=MODEL_CONTAINER.config.max_seq_len,
             cache_mode="FP8" if MODEL_CONTAINER.cache_fp8 else "FP16",
             prompt_template=prompt_template.name if prompt_template else None,
+            num_experts_per_token=MODEL_CONTAINER.config.num_experts_per_token,
         ),
         logging=gen_logging.PREFERENCES,
     )