Add top n sigma sampler and other webui fix (#512)

Co-authored-by: firecoperana <firecoperana>
2026-04-21 06:59:21 +00:00 · 2025-06-12 00:19:26 -05:00
parent 4fc3cb4a47
commit 7b1a3eece7
8 changed files with 36 additions and 26 deletions
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -11,6 +11,7 @@

 // sampler types
 enum class llama_sampler_type : char {
+    DRY            ='d',
    TOP_K       = 'k',
    TOP_P       = 'p',
    MIN_P       = 'm',
@@ -53,6 +54,7 @@ typedef struct llama_sampling_params {
        llama_sampler_type::TYPICAL_P,
        llama_sampler_type::TOP_P,
        llama_sampler_type::MIN_P,
+        llama_sampler_type::TOP_N_SIGMA,
        llama_sampler_type::TEMPERATURE
    };

--- a/examples/server/public/index.html.gz
+++ b/examples/server/public/index.html.gz
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -970,6 +970,9 @@ struct server_context {
        slot.sparams.temp              = json_value(data, "temperature",       default_sparams.temp);
        slot.sparams.dynatemp_range    = json_value(data, "dynatemp_range",    default_sparams.dynatemp_range);
        slot.sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
+        slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
+        slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
+        slot.sparams.top_n_sigma = json_value(data, "top_n_sigma", default_sparams.top_n_sigma);
        slot.sparams.penalty_last_n    = json_value(data, "repeat_last_n",     default_sparams.penalty_last_n);
        slot.sparams.penalty_repeat    = json_value(data, "repeat_penalty",    default_sparams.penalty_repeat);
        slot.sparams.penalty_freq      = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
@@ -1135,17 +1138,17 @@ struct server_context {
        }

        {
-            const auto & samplers_sequence = data.find("samplers");
-            if (samplers_sequence != data.end() && samplers_sequence->is_array()) {
-                std::vector<std::string> sampler_names;
-                for (const auto & sampler_name : *samplers_sequence) {
-                    if (sampler_name.is_string()) {
-                        sampler_names.emplace_back(sampler_name);
-                    }
+            const auto samplers = data.find("samplers");
+            if (samplers != data.end()) {
+                if (samplers->is_array()) {
+                    slot.sparams.samplers_sequence = llama_sampling_types_from_names(*samplers, false);
+                }
+                else if (samplers->is_string()) {
+                    slot.sparams.samplers_sequence = llama_sampling_types_from_chars(samplers->get<std::string>());
+                }
+                else {
+                    slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
                }
-                slot.sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
-            } else {
-                slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
            }
        }

--- a/examples/server/webui/dist/index.html
+++ b/examples/server/webui/dist/index.html
--- a/examples/server/webui/src/Config.ts
+++ b/examples/server/webui/src/Config.ts
@@ -17,7 +17,7 @@ export const CONFIG_DEFAULT = {
  showThoughtInProgress: false,
  excludeThoughtOnReq: true,
  // make sure these default values are in sync with `common.h`
-  samplers: 'dkypmxt',
+  samplers: 'dkypmxnt',
  temperature: 0.8,
  dynatemp_range: 0.0,
  dynatemp_exponent: 1.0,
@@ -26,6 +26,7 @@ export const CONFIG_DEFAULT = {
  min_p: 0.05,
  xtc_probability: 0.0,
  xtc_threshold: 0.1,
+  top_n_sigma: 0.0,
  typical_p: 1.0,
  repeat_last_n: 64,
  repeat_penalty: 1.0,
@@ -44,7 +45,7 @@ export const CONFIG_INFO: Record<string, string> = {
  apiKey: 'Set the API Key if you are using --api-key option for the server.',
  systemMessage: 'The starting message that defines how model should behave.',
  samplers:
-    'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
+    'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->top_sigma->temperature',
  temperature:
    'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
  dynatemp_range:
@@ -60,6 +61,8 @@ export const CONFIG_INFO: Record<string, string> = {
    'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
  xtc_threshold:
    'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
+  top_n_sigma:
+    'Top-n-sigma sampling filters out low-value tokens by discarding tokens that fall more than n standard deviations below the maximum probability',
  typical_p:
    'Sorts and limits tokens based on the difference between log-probability and entropy.',
  repeat_last_n: 'Last n tokens to consider for penalizing repetition',
--- a/examples/server/webui/src/components/Header.tsx
+++ b/examples/server/webui/src/components/Header.tsx
@@ -70,7 +70,7 @@ export default function Header() {
        </svg>
      </label>

-      <div className="grow text-2xl font-bold ml-2">llama.cpp</div>
+      <div className="grow text-2xl font-bold ml-2">ik_llama.cpp</div>

      {/* action buttons (top right) */}
      <div className="flex items-center">
--- a/examples/server/webui/src/components/SettingDialog.tsx
+++ b/examples/server/webui/src/components/SettingDialog.tsx
@@ -29,6 +29,7 @@ const SAMPLER_KEYS: SettKey[] = [
  'typical_p',
  'xtc_probability',
  'xtc_threshold',
+  'top_n_sigma'
 ];
 const PENALTY_KEYS: SettKey[] = [
  'repeat_last_n',
@@ -196,7 +197,7 @@ const SETTING_SECTIONS: SettingSection[] = [
        label: (
          <>
            Custom JSON config (For more info, refer to{' '}
-            <OpenInNewTab href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md">
+            <OpenInNewTab href="https://github.com/ikawrakow/ik_llama.cpp/tree/main/examples/server/README.md">
              server documentation
            </OpenInNewTab>
            )
@@ -224,7 +225,7 @@ const SETTING_SECTIONS: SettingSection[] = [
              <br />
              <br />
              If you encounter any problems, create a{' '}
-              <OpenInNewTab href="https://github.com/ggerganov/llama.cpp/issues/new?template=019-bug-misc.yml">
+              <OpenInNewTab href="https://github.com/ikawrakow/ik_llama.cpp/issues/new?template=019-bug-misc.yml">
                Bug (misc.)
              </OpenInNewTab>{' '}
              report on Github. Please also specify <b>webui/experimental</b> on
@@ -456,11 +457,11 @@ function SettingsModalLongInput({
      <div className="label inline">{label || configKey}</div>
      <textarea
        className="textarea textarea-bordered h-24"
-        placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
-        value={value}
-        onChange={(e) => onChange(e.target.value)}
-      />
-    </label>
+          placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
+          value={value}
+          onChange={(e) => onChange(e.target.value)}
+        />
+      </label>
  );
 }

@@ -485,7 +486,7 @@ function SettingsModalShortInput({
        <div className="block md:hidden mb-1">
          <b>{label || configKey}</b>
          <br />
-          <p className="text-xs">{helpMsg}</p>
+          <p className="text-xs whitespace-normal">{helpMsg}</p>
        </div>
      )}
      <label className="input input-bordered join-item grow flex items-center gap-2 mb-2">
@@ -494,7 +495,7 @@ function SettingsModalShortInput({
            {label || configKey}
          </div>
          {helpMsg && (
-            <div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
+            <div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4 whitespace-normal break-words">
              {helpMsg}
            </div>
          )}
--- a/examples/server/webui/src/utils/app.context.tsx
+++ b/examples/server/webui/src/utils/app.context.tsx
@@ -200,6 +200,7 @@ export const AppContextProvider = ({
        typical_p: config.typical_p,
        xtc_probability: config.xtc_probability,
        xtc_threshold: config.xtc_threshold,
+		top_n_sigma: config.top_n_sigma,
        repeat_last_n: config.repeat_last_n,
        repeat_penalty: config.repeat_penalty,
        presence_penalty: config.presence_penalty,