mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Add top n sigma sampler and other webui fix (#512)
Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
// sampler types
|
// sampler types
|
||||||
enum class llama_sampler_type : char {
|
enum class llama_sampler_type : char {
|
||||||
|
DRY ='d',
|
||||||
TOP_K = 'k',
|
TOP_K = 'k',
|
||||||
TOP_P = 'p',
|
TOP_P = 'p',
|
||||||
MIN_P = 'm',
|
MIN_P = 'm',
|
||||||
@@ -53,6 +54,7 @@ typedef struct llama_sampling_params {
|
|||||||
llama_sampler_type::TYPICAL_P,
|
llama_sampler_type::TYPICAL_P,
|
||||||
llama_sampler_type::TOP_P,
|
llama_sampler_type::TOP_P,
|
||||||
llama_sampler_type::MIN_P,
|
llama_sampler_type::MIN_P,
|
||||||
|
llama_sampler_type::TOP_N_SIGMA,
|
||||||
llama_sampler_type::TEMPERATURE
|
llama_sampler_type::TEMPERATURE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -970,6 +970,9 @@ struct server_context {
|
|||||||
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
slot.sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
slot.sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||||
|
slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
|
||||||
|
slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
|
||||||
|
slot.sparams.top_n_sigma = json_value(data, "top_n_sigma", default_sparams.top_n_sigma);
|
||||||
slot.sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
slot.sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||||
slot.sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
|
slot.sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
|
||||||
slot.sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
|
slot.sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
|
||||||
@@ -1135,17 +1138,17 @@ struct server_context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const auto & samplers_sequence = data.find("samplers");
|
const auto samplers = data.find("samplers");
|
||||||
if (samplers_sequence != data.end() && samplers_sequence->is_array()) {
|
if (samplers != data.end()) {
|
||||||
std::vector<std::string> sampler_names;
|
if (samplers->is_array()) {
|
||||||
for (const auto & sampler_name : *samplers_sequence) {
|
slot.sparams.samplers_sequence = llama_sampling_types_from_names(*samplers, false);
|
||||||
if (sampler_name.is_string()) {
|
}
|
||||||
sampler_names.emplace_back(sampler_name);
|
else if (samplers->is_string()) {
|
||||||
}
|
slot.sparams.samplers_sequence = llama_sampling_types_from_chars(samplers->get<std::string>());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
}
|
}
|
||||||
slot.sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
|
||||||
} else {
|
|
||||||
slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
8
examples/server/webui/dist/index.html
vendored
8
examples/server/webui/dist/index.html
vendored
File diff suppressed because one or more lines are too long
@@ -17,7 +17,7 @@ export const CONFIG_DEFAULT = {
|
|||||||
showThoughtInProgress: false,
|
showThoughtInProgress: false,
|
||||||
excludeThoughtOnReq: true,
|
excludeThoughtOnReq: true,
|
||||||
// make sure these default values are in sync with `common.h`
|
// make sure these default values are in sync with `common.h`
|
||||||
samplers: 'dkypmxt',
|
samplers: 'dkypmxnt',
|
||||||
temperature: 0.8,
|
temperature: 0.8,
|
||||||
dynatemp_range: 0.0,
|
dynatemp_range: 0.0,
|
||||||
dynatemp_exponent: 1.0,
|
dynatemp_exponent: 1.0,
|
||||||
@@ -26,6 +26,7 @@ export const CONFIG_DEFAULT = {
|
|||||||
min_p: 0.05,
|
min_p: 0.05,
|
||||||
xtc_probability: 0.0,
|
xtc_probability: 0.0,
|
||||||
xtc_threshold: 0.1,
|
xtc_threshold: 0.1,
|
||||||
|
top_n_sigma: 0.0,
|
||||||
typical_p: 1.0,
|
typical_p: 1.0,
|
||||||
repeat_last_n: 64,
|
repeat_last_n: 64,
|
||||||
repeat_penalty: 1.0,
|
repeat_penalty: 1.0,
|
||||||
@@ -44,7 +45,7 @@ export const CONFIG_INFO: Record<string, string> = {
|
|||||||
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
apiKey: 'Set the API Key if you are using --api-key option for the server.',
|
||||||
systemMessage: 'The starting message that defines how model should behave.',
|
systemMessage: 'The starting message that defines how model should behave.',
|
||||||
samplers:
|
samplers:
|
||||||
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
|
'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->top_sigma->temperature',
|
||||||
temperature:
|
temperature:
|
||||||
'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
|
'Controls the randomness of the generated text by affecting the probability distribution of the output tokens. Higher = more random, lower = more focused.',
|
||||||
dynatemp_range:
|
dynatemp_range:
|
||||||
@@ -60,6 +61,8 @@ export const CONFIG_INFO: Record<string, string> = {
|
|||||||
'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
|
'XTC sampler cuts out top tokens; this parameter controls the chance of cutting tokens at all. 0 disables XTC.',
|
||||||
xtc_threshold:
|
xtc_threshold:
|
||||||
'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
|
'XTC sampler cuts out top tokens; this parameter controls the token probability that is required to cut that token.',
|
||||||
|
top_n_sigma:
|
||||||
|
'Top-n-sigma sampling filters out low-value tokens by discarding tokens that fall more than n standard deviations below the maximum probability',
|
||||||
typical_p:
|
typical_p:
|
||||||
'Sorts and limits tokens based on the difference between log-probability and entropy.',
|
'Sorts and limits tokens based on the difference between log-probability and entropy.',
|
||||||
repeat_last_n: 'Last n tokens to consider for penalizing repetition',
|
repeat_last_n: 'Last n tokens to consider for penalizing repetition',
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ export default function Header() {
|
|||||||
</svg>
|
</svg>
|
||||||
</label>
|
</label>
|
||||||
|
|
||||||
<div className="grow text-2xl font-bold ml-2">llama.cpp</div>
|
<div className="grow text-2xl font-bold ml-2">ik_llama.cpp</div>
|
||||||
|
|
||||||
{/* action buttons (top right) */}
|
{/* action buttons (top right) */}
|
||||||
<div className="flex items-center">
|
<div className="flex items-center">
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ const SAMPLER_KEYS: SettKey[] = [
|
|||||||
'typical_p',
|
'typical_p',
|
||||||
'xtc_probability',
|
'xtc_probability',
|
||||||
'xtc_threshold',
|
'xtc_threshold',
|
||||||
|
'top_n_sigma'
|
||||||
];
|
];
|
||||||
const PENALTY_KEYS: SettKey[] = [
|
const PENALTY_KEYS: SettKey[] = [
|
||||||
'repeat_last_n',
|
'repeat_last_n',
|
||||||
@@ -196,7 +197,7 @@ const SETTING_SECTIONS: SettingSection[] = [
|
|||||||
label: (
|
label: (
|
||||||
<>
|
<>
|
||||||
Custom JSON config (For more info, refer to{' '}
|
Custom JSON config (For more info, refer to{' '}
|
||||||
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md">
|
<OpenInNewTab href="https://github.com/ikawrakow/ik_llama.cpp/tree/main/examples/server/README.md">
|
||||||
server documentation
|
server documentation
|
||||||
</OpenInNewTab>
|
</OpenInNewTab>
|
||||||
)
|
)
|
||||||
@@ -224,7 +225,7 @@ const SETTING_SECTIONS: SettingSection[] = [
|
|||||||
<br />
|
<br />
|
||||||
<br />
|
<br />
|
||||||
If you encounter any problems, create a{' '}
|
If you encounter any problems, create a{' '}
|
||||||
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/issues/new?template=019-bug-misc.yml">
|
<OpenInNewTab href="https://github.com/ikawrakow/ik_llama.cpp/issues/new?template=019-bug-misc.yml">
|
||||||
Bug (misc.)
|
Bug (misc.)
|
||||||
</OpenInNewTab>{' '}
|
</OpenInNewTab>{' '}
|
||||||
report on Github. Please also specify <b>webui/experimental</b> on
|
report on Github. Please also specify <b>webui/experimental</b> on
|
||||||
@@ -456,11 +457,11 @@ function SettingsModalLongInput({
|
|||||||
<div className="label inline">{label || configKey}</div>
|
<div className="label inline">{label || configKey}</div>
|
||||||
<textarea
|
<textarea
|
||||||
className="textarea textarea-bordered h-24"
|
className="textarea textarea-bordered h-24"
|
||||||
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
|
placeholder={`Default: ${CONFIG_DEFAULT[configKey] || 'none'}`}
|
||||||
value={value}
|
value={value}
|
||||||
onChange={(e) => onChange(e.target.value)}
|
onChange={(e) => onChange(e.target.value)}
|
||||||
/>
|
/>
|
||||||
</label>
|
</label>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -485,7 +486,7 @@ function SettingsModalShortInput({
|
|||||||
<div className="block md:hidden mb-1">
|
<div className="block md:hidden mb-1">
|
||||||
<b>{label || configKey}</b>
|
<b>{label || configKey}</b>
|
||||||
<br />
|
<br />
|
||||||
<p className="text-xs">{helpMsg}</p>
|
<p className="text-xs whitespace-normal">{helpMsg}</p>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
<label className="input input-bordered join-item grow flex items-center gap-2 mb-2">
|
<label className="input input-bordered join-item grow flex items-center gap-2 mb-2">
|
||||||
@@ -494,7 +495,7 @@ function SettingsModalShortInput({
|
|||||||
{label || configKey}
|
{label || configKey}
|
||||||
</div>
|
</div>
|
||||||
{helpMsg && (
|
{helpMsg && (
|
||||||
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4">
|
<div className="dropdown-content menu bg-base-100 rounded-box z-10 w-64 p-2 shadow mt-4 whitespace-normal break-words">
|
||||||
{helpMsg}
|
{helpMsg}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ export const AppContextProvider = ({
|
|||||||
typical_p: config.typical_p,
|
typical_p: config.typical_p,
|
||||||
xtc_probability: config.xtc_probability,
|
xtc_probability: config.xtc_probability,
|
||||||
xtc_threshold: config.xtc_threshold,
|
xtc_threshold: config.xtc_threshold,
|
||||||
|
top_n_sigma: config.top_n_sigma,
|
||||||
repeat_last_n: config.repeat_last_n,
|
repeat_last_n: config.repeat_last_n,
|
||||||
repeat_penalty: config.repeat_penalty,
|
repeat_penalty: config.repeat_penalty,
|
||||||
presence_penalty: config.presence_penalty,
|
presence_penalty: config.presence_penalty,
|
||||||
|
|||||||
Reference in New Issue
Block a user