Update webui to handle reasoning content and include usage stats in server only when requested (#791)

* handle reasoning content in webui server : include usage statistics only when user request them (#16052) server : only attempt to enable thinking if using jinja (#15967) * config reasoning_content in webui and change default to auto --------- Co-authored-by: firecoperana <firecoperana>
2026-04-21 23:19:22 +00:00 · 2025-09-24 00:45:09 -05:00
parent 8b4208e789
commit 17f7f1ed18
7 changed files with 87 additions and 58 deletions
--- a/examples/server/webui/src/Config.ts
+++ b/examples/server/webui/src/Config.ts
@@ -16,6 +16,7 @@ export const CONFIG_DEFAULT = {
  showTokensPerSecond: false,
  showThoughtInProgress: false,
  excludeThoughtOnReq: true,
+  reasoning_format: 'auto',
  // make sure these default values are in sync with `common.h`
  samplers: 'dkypmxnt',
  temperature: 0.8,
@@ -42,6 +43,7 @@ export const CONFIG_DEFAULT = {
  pyIntepreterEnabled: false,
 };
 export const CONFIG_INFO: Record<string, string> = {
+  reasoning_format : 'Specify how to parse reasoning content. none: reasoning content in content block. auto: reasoning content in reasoning_content. ',
  apiKey: 'Set the API Key if you are using --api-key option for the server.',
  systemMessage: 'The starting message that defines how model should behave.',
  samplers:
--- a/examples/server/webui/src/components/SettingDialog.tsx
+++ b/examples/server/webui/src/components/SettingDialog.tsx
@@ -22,6 +22,7 @@ import toast from 'react-hot-toast'
 type SettKey = keyof typeof CONFIG_DEFAULT;

 const BASIC_KEYS: SettKey[] = [
+  'reasoning_format',
  'temperature',
  'top_k',
  'top_p',
--- a/examples/server/webui/src/utils/app.context.tsx
+++ b/examples/server/webui/src/utils/app.context.tsx
@@ -215,7 +215,7 @@ export const AppContextProvider = ({
        messages,
        stream: true,
        cache_prompt: true,
-        reasoning_format: 'none',
+        reasoning_format: config.reasoning_format===''?'auto':config.reasoning_format,
        samplers: config.samplers,
        temperature: config.temperature,
        dynatemp_range: config.dynatemp_range,
@@ -226,7 +226,7 @@ export const AppContextProvider = ({
        typical_p: config.typical_p,
        xtc_probability: config.xtc_probability,
        xtc_threshold: config.xtc_threshold,
-		top_n_sigma: config.top_n_sigma,
+		    top_n_sigma: config.top_n_sigma,
        repeat_last_n: config.repeat_last_n,
        repeat_penalty: config.repeat_penalty,
        presence_penalty: config.presence_penalty,
@@ -257,14 +257,35 @@ export const AppContextProvider = ({
        throw new Error(body?.error?.message || 'Unknown error');
      }
      const chunks = getSSEStreamAsync(fetchResponse);
+      let thinkingTagOpen = false;
      for await (const chunk of chunks) {
        // const stop = chunk.stop;
        if (chunk.error) {
          throw new Error(chunk.error?.message || 'Unknown error');
        }
+        
+        const reasoningContent = chunk.choices?.[0]?.delta?.reasoning_content;
+        if (reasoningContent) {
+          if (pendingMsg.content === null || pendingMsg.content === '') {
+            thinkingTagOpen = true;
+            pendingMsg = {
+              ...pendingMsg,
+              content: '<think>' + reasoningContent,
+            };
+          } else {
+            pendingMsg = {
+              ...pendingMsg,
+              content: pendingMsg.content + reasoningContent,
+            };
+          }
+        }
        const addedContent = chunk.choices?.[0]?.delta?.content;
-        const lastContent = pendingMsg.content || '';
+        let lastContent = pendingMsg.content || '';
        if (addedContent) {
+            if (thinkingTagOpen) {
+              lastContent = lastContent + '</think>';
+              thinkingTagOpen = false;
+            }
          pendingMsg = {
            ...pendingMsg,
            content: lastContent + addedContent,