Add top n sigma sampler and other webui fix (#512)

Co-authored-by: firecoperana <firecoperana>
2026-04-25 17:09:22 +00:00 · 2025-06-12 00:19:26 -05:00
parent 4fc3cb4a47
commit 7b1a3eece7
8 changed files with 36 additions and 26 deletions
--- a/examples/server/webui/src/Config.ts
+++ b/examples/server/webui/src/Config.ts
@@ -17,7 +17,7 @@ export const CONFIG_DEFAULT = {
  showThoughtInProgress: false,
  excludeThoughtOnReq: true,
  // make sure these default values are in sync with `common.h`
-  samplers: 'dkypmxt',
+  samplers: 'dkypmxnt',
  temperature: 0.8,
  dynatemp_range: 0.0,
  dynatemp_exponent: 1.0,
@@ -26,6 +26,7 @@ export const CONFIG_DEFAULT = {
  min_p: 0.05,
  xtc_probability: 0.0,
  xtc_threshold: 0.1,
+  top_n_sigma: 0.0,
  typical_p: 1.0,
  repeat_last_n: 64,
  repeat_penalty: 1.0,
@@ -44,7 +45,7 @@ export const CONFIG_INFO: Record<string, string> = {
  apiKey: 'Set the API Key if you are using --api-key option for the server.',
  systemMessage: 'The starting message that defines how model should behave.',
  samplers:
-    'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
+    'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->top_sigma->temperature',
  temperature:
    'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
  dynatemp_range:
@@ -60,6 +61,8 @@ export const CONFIG_INFO: Record<string, string> = {
    'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
  xtc_threshold:
    'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
+  top_n_sigma:
+    'Top-n-sigma sampling filters out low-value tokens by discarding tokens that fall more than n standard deviations below the maximum probability',
  typical_p:
    'Sorts and limits tokens based on the difference between log-probability and entropy.',
  repeat_last_n: 'Last n tokens to consider for penalizing repetition',