Webui: add text completions and adaptive_p sampling (#1153)

* Webui: add text completions and adaptive_p sampling * update description --------- Co-authored-by: firecoperana <firecoperana>
2026-01-26 17:20:01 +00:00 · 2026-01-17 00:37:07 -06:00
parent 709e1a5375
commit ee463b079e
7 changed files with 308 additions and 143 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -396,7 +396,7 @@ static void sampler_queue(
    const float         top_n_sigma = params.top_n_sigma;
    const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
-
+    bool use_adaptive_p = false; // see below
    for (auto sampler_type : samplers_sequence) {
        switch (sampler_type) {
            case llama_sampler_type::DRY        : llama_sample_dry      (ctx_main, ctx_sampling->smpl, &cur_p); break;
@@ -416,9 +416,14 @@ static void sampler_queue(
                    llama_sample_temp(ctx_main, &cur_p, temp);
                }
                break;
-            case llama_sampler_type::ADAPTIVE_P: llama_sample_adaptive_p(ctx_main, ctx_sampling->adapt_p_ctx, &cur_p); break;
+            case llama_sampler_type::ADAPTIVE_P:  use_adaptive_p = true; break;
            default : break;
        }
    }
    if (use_adaptive_p) {
        // adaptive p should be put to the last, so we ignore the order in the sampler
        llama_sample_adaptive_p(ctx_main, ctx_sampling->adapt_p_ctx, &cur_p);
    }
 }
@@ -464,7 +469,7 @@ static llama_token llama_sampling_sample_impl(
        } else if (mirostat == 2) {
            llama_sample_temp(ctx_main, &cur_p, temp);
            id = llama_sample_token_mirostat_v2(ctx_main, &cur_p, mirostat_tau, mirostat_eta, &ctx_sampling->mirostat_mu);
-        } else if (adaptive_target >= 0.0f) {
+        } else if (adaptive_target >= 0.0f && ctx_sampling->adapt_p_ctx!=nullptr) {
            // adaptive p sampling
            static thread_local std::vector<float> orig_probs;
            orig_probs.resize(cur_p.size);
--- a/examples/server/public/index.html.gz
+++ b/examples/server/public/index.html.gz
--- a/examples/server/webui/dist/index.html
+++ b/examples/server/webui/dist/index.html
--- a/examples/server/webui/src/Config.ts
+++ b/examples/server/webui/src/Config.ts
@@ -12,6 +12,7 @@ export const CONFIG_DEFAULT = {
  // Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
  // Do not use nested objects, keep it single level. Prefix the key if you need to group them.
  apiKey: '',
  completionType: 'Chat',
  systemMessage: 'You are a helpful assistant.',
  showTokensPerSecond: false, 
  showThoughtInProgress: false, 
@@ -21,7 +22,7 @@ export const CONFIG_DEFAULT = {
  pdfAsImage: false,
  reasoning_format: 'auto',
  // make sure these default values are in sync with `common.h`
-  samplers: 'dkypmxnt',
+  samplers: 'kdypmxntw',
  temperature: 0.8,
  dynatemp_range: 0.0,
  dynatemp_exponent: 1.0,
@@ -30,6 +31,8 @@ export const CONFIG_DEFAULT = {
  min_p: 0.05,
  xtc_probability: 0.0,
  xtc_threshold: 0.1,
  adaptive_target: -1.0,
  adaptive_decay: 0.9,
  top_n_sigma: 0.0,
  typical_p: 1.0,
  repeat_last_n: 64,
@@ -41,6 +44,8 @@ export const CONFIG_DEFAULT = {
  dry_allowed_length: 2,
  dry_penalty_last_n: -1,
  max_tokens: -1,
  stop_string: '\\n\\n,\\nUser:',
  prefix_role:'true',
  custom: '', // custom json-stringified object
  // experimental features
  pyIntepreterEnabled: false,
@@ -48,11 +53,12 @@ export const CONFIG_DEFAULT = {
 export const CONFIG_INFO: Record<string, string> = {
  reasoning_format : 'Specify how to parse reasoning content. none: reasoning content in content block. auto: reasoning content in reasoning_content. ',
  apiKey: 'Set the API Key if you are using --api-key option for the server.',
  completionType:'Set completion type to be either chat or text',
  systemMessage: 'The starting message that defines how model should behave.',
  pasteLongTextToFileLen:
    'On pasting long text, it will be converted to a file. You can control the file length by setting the value of this parameter. Value 0 means disable.',
  samplers:
-    'The order at which samplers are applied, in simplified way. Default is "dkypmxnt": dry->top_k->typ_p->top_p->min_p->xtc->top_sigma->temperature',
+    'The order at which samplers are applied, in simplified way. Default is "kdypmxntw": top_k->dry->typ_p->top_p->min_p->xtc->top_sigma->temperature->adaptive_p',
  temperature:
    'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
  dynatemp_range:
@@ -89,6 +95,10 @@ export const CONFIG_INFO: Record<string, string> = {
    'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.',
  max_tokens: 'The maximum number of token per output.',
  useServerDefaults: 'When enabled, skip sending WebUI defaults (e.g., temperature) and use the server\'s default values instead.',
  stop_string: 'List of stop string separated by comma. Not applied to chat completions.',
  prefix_role: 'Whether to add Role at the start of each message. Not applied to chat completions.',
  adaptive_target: 'Select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)',
  adaptive_decay: 'Decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)',
  custom: '', // custom json-stringified object
 };
 // config keys having numeric value (i.e. temperature, top_k, top_p, etc)
--- a/examples/server/webui/src/components/SettingDialog.tsx
+++ b/examples/server/webui/src/components/SettingDialog.tsx
@@ -1,4 +1,4 @@
-import { useState, useRef} from 'react';
+import { useState, useRef } from 'react';
 import { useAppContext } from '../utils/app.context';
 import { CONFIG_DEFAULT, CONFIG_INFO } from '../Config';
 import StorageUtils from '../utils/storage';
@@ -20,7 +20,10 @@ import toast from 'react-hot-toast'
 type SettKey = keyof typeof CONFIG_DEFAULT;
 const BASIC_KEYS: SettKey[] = [
  'prefix_role',
  'stop_string',
  'reasoning_format',
  'temperature',
  'top_k',
@@ -29,28 +32,32 @@ const BASIC_KEYS: SettKey[] = [
  'max_tokens',
 ];
 const SAMPLER_KEYS: SettKey[] = [
-  'dynatemp_range',
+  'top_n_sigma',
-  'dynatemp_exponent',
+  'adaptive_target',
-  'typical_p',
+  'adaptive_decay',
  'xtc_probability',
  'xtc_threshold',
-  'top_n_sigma'
+  'dynatemp_range',
  'dynatemp_exponent',
  'typical_p'
 ];
 const PENALTY_KEYS: SettKey[] = [
  'repeat_last_n',
  'repeat_penalty',
  'presence_penalty',
  'frequency_penalty',
  'dry_multiplier',
  'dry_base',
  'dry_allowed_length',
  'dry_penalty_last_n',
  'repeat_last_n',
  'repeat_penalty',
  'presence_penalty',
  'frequency_penalty',
 ];
 enum SettingInputType {
  SHORT_INPUT,
  LONG_INPUT,
  CHECKBOX,
  DROPDOWN,
  CUSTOM,
 }
@@ -59,6 +66,7 @@ interface SettingFieldInput {
  label: string | React.ReactElement;
  help?: string | React.ReactElement;
  key: SettKey;
  options?:string;
 }
 interface SettingFieldCustom {
@@ -266,6 +274,12 @@ const SETTING_SECTIONS = (
        label: 'System Message (will be disabled if left empty)',
        key: 'systemMessage',
      },
      {
        type: SettingInputType.DROPDOWN,
        label: 'Completion Type',
        key: 'completionType',    
        options: 'Chat|Text'    
      },
      ...BASIC_KEYS.map(
        (key) =>
          ({
@@ -693,7 +707,19 @@ export default function SettingDialog({
                    label={field.label as string}
                  />
                );
-              } else if (field.type === SettingInputType.CUSTOM) {
+              } else if (field.type === SettingInputType.DROPDOWN) {
                return (
                  <SettingsModalDropdown
                    key={key}
                    configKey={field.key}
                    value={localConfig[field.key].toString()}
                    onChange={onChange(field.key)}
                    label={field.label as string}
                    options={field.options?field.options:''}
                  />
                );
              } 
                else if (field.type === SettingInputType.CUSTOM) {
                return (
                  <div key={key} className="mb-2">
                    {typeof field.component === 'string'
@@ -823,3 +849,41 @@ function SettingsModalCheckbox({
    </div>
  );
 }
 function SettingsModalDropdown({
  configKey,
  value,
  onChange,
  label,
  options,
 }: {
  configKey: SettKey;
  value: string; // This should be one of the option values like "Text" or "Chat"
  onChange: (value: string) => void;
  label: string; 
  options:string; // Format: "Display Label|Text|Chat"
 }) {
   const parts = options.split('|');  
  const selectedValue = options.includes(value) ? value : (options[0] || "");
  return (
    <div className="flex flex-row items-center mb-2">
      <div className="flex flex-col w-full">
        <label className="mb-1 text-sm font-medium text-gray-700">
          {label || configKey}
        </label>
        <select
          className="select select-bordered w-full max-w-xs"
          value={selectedValue}
          onChange={(e) => onChange(e.target.value)}
        >
          {parts .map((opt) => (
            <option key={opt} value={opt}>
              {opt}
            </option>
          ))}
        </select>
      </div>
    </div>
  );
 }
--- a/examples/server/webui/src/utils/app.context.tsx
+++ b/examples/server/webui/src/utils/app.context.tsx
@@ -12,6 +12,7 @@ import StorageUtils from './storage';
 import {
  filterThoughtFromMsgs,
  normalizeMsgsForAPI,
  normalizeMsgsForTextAPI,
  getSSEStreamAsync,
  getServerProps,
 } from './misc';
@@ -231,14 +232,24 @@ export const AppContextProvider = ({
          : [{ role: 'system', content: config.systemMessage } as APIMessage]),
        ...normalizeMsgsForAPI(currMessages),
      ];
      let prompt='';
      if (config.excludeThoughtOnReq) {
        messages = filterThoughtFromMsgs(messages);
      }
      let isText = config.completionType==='Text';
      if (isText) {
        prompt = normalizeMsgsForTextAPI(messages, config.prefix_role==='true');
      } 
      if (isDev) console.log({ messages });
      // prepare params
      const jsonString = `"${config.stop_string}"`;
      let stop_list=JSON.parse(jsonString).split(',');
      if (stop_list.length===1&&stop_list[0]=='') {
        stop_list='\n\n,\nUser:'.split(',');
      }
      const params = {
-        messages,
+        ...(isText?{prompt:prompt}:{messages:messages}),
        stream: true,
        cache_prompt: true,
        reasoning_format: config.reasoning_format===''?'auto':config.reasoning_format,
@@ -257,7 +268,10 @@ export const AppContextProvider = ({
        dry_allowed_length: config.dry_allowed_length,
        dry_penalty_last_n: config.dry_penalty_last_n,
        max_tokens: config.max_tokens,
        adaptive_target:config.adaptive_target,
        adaptive_decay: config.adaptive_decay,
        timings_per_token: !!config.showTokensPerSecond,
        ...(isText?{stop:stop_list}:{}),
 	      ...(config.useServerDefaults ? {} :{
 	          temperature: config.temperature,
 	          top_k: config.top_k,
@@ -269,7 +283,11 @@ export const AppContextProvider = ({
      };
      // send request
-      const fetchResponse = await fetch(`${BASE_URL}/v1/chat/completions`, {
+      let url = `${BASE_URL}/v1/chat/completions`;
      if (isText) {
        url = `${BASE_URL}/v1/completions`;
      }
      const fetchResponse = await fetch(`${url}`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
@@ -291,23 +309,29 @@ export const AppContextProvider = ({
        if (chunk.error) {
          throw new Error(chunk.error?.message || 'Unknown error');
        }
-        
+        let addedContent = '';
-        const reasoningContent = chunk.choices?.[0]?.delta?.reasoning_content;
+        if (!isText) {
-        if (reasoningContent) {
+          const reasoningContent = chunk.choices?.[0]?.delta?.reasoning_content;
-          if (pendingMsg.content === null || pendingMsg.content === '') {
+          if (reasoningContent) {
-            thinkingTagOpen = true;
+            if (pendingMsg.content === null || pendingMsg.content === '') {
-            pendingMsg = {
+              thinkingTagOpen = true;
-              ...pendingMsg,
+              pendingMsg = {
-              content: '<think>' + reasoningContent,
+                ...pendingMsg,
-            };
+                content: '<think>' + reasoningContent,
-          } else {
+              };
-            pendingMsg = {
+            } else {
-              ...pendingMsg,
+              pendingMsg = {
-              content: pendingMsg.content + reasoningContent,
+                ...pendingMsg,
-            };
+                content: pendingMsg.content + reasoningContent,
              };
            }
          }
          addedContent = chunk.choices?.[0]?.delta?.content;
         }
        else { 
          addedContent=chunk.choices?.[0]?.text;
        }
-        const addedContent = chunk.choices?.[0]?.delta?.content;
+        
        let lastContent = pendingMsg.content || '';
        if (addedContent) {
            if (thinkingTagOpen) {
--- a/examples/server/webui/src/utils/misc.ts
+++ b/examples/server/webui/src/utils/misc.ts
@@ -95,7 +95,6 @@ export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
        throw new Error('Unknown extra type');
      }
    }
    // add user message to the end
    contentArr.push({
      type: 'text',
@@ -109,6 +108,61 @@ export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
  }) as APIMessage[];
 }
 export function GetFileContentForTextAPI(msg: APIMessage) {
      let content=msg.content;
      if (typeof msg.content !=='string') {
        let content_list=msg.content as APIMessageContentPart[];
        let extra_list=content_list.map((extr)=>{
          if (extr.type==='text') {
              return extr.text;
          } else {
              return '';
          }       
        })
        content = extra_list.join('\n');
      }
      return content;
 }
 export function normalizeMsgsForTextAPI(messages: Readonly<APIMessage[]>, append: boolean) {
  let prompt = '';
  if (append) {
      let newMessages = messages.map((msg) => {
      let content= GetFileContentForTextAPI(msg);
      if (msg.role==='system') {
        return content+'\n***';
      }
      else if (msg.role==='assistant') {
        return 'Assistant:'+ content;
      }
      else if (msg.role==='user') {
        return 'User:'+ content;
      }
      return  msg.role+":"+ content;
    });
    prompt = newMessages.join('\n');
    prompt=prompt+'\nAssistant:';
  }
  else {
    let newMessages = messages.map((msg) => {
      let content= GetFileContentForTextAPI(msg);
      if (msg.role==='system') {
        return content+'\n***';
      }
      else if (msg.role==='assistant') {
        return content;
      }
      else if (msg.role==='user') {
        return content;
      }
      return content;
    });
    prompt = newMessages.join('');
  }
  return prompt as string;
 }
 /**
 * recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
 */