Webui: add text completions and adaptive_p sampling (#1153)

* Webui: add text completions and adaptive_p sampling

* update description

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2026-01-17 00:37:07 -06:00
committed by GitHub
parent 709e1a5375
commit ee463b079e
7 changed files with 308 additions and 143 deletions

View File

@@ -396,7 +396,7 @@ static void sampler_queue(
const float top_n_sigma = params.top_n_sigma;
const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
bool use_adaptive_p = false; // see below
for (auto sampler_type : samplers_sequence) {
switch (sampler_type) {
case llama_sampler_type::DRY : llama_sample_dry (ctx_main, ctx_sampling->smpl, &cur_p); break;
@@ -416,9 +416,14 @@ static void sampler_queue(
llama_sample_temp(ctx_main, &cur_p, temp);
}
break;
case llama_sampler_type::ADAPTIVE_P: llama_sample_adaptive_p(ctx_main, ctx_sampling->adapt_p_ctx, &cur_p); break;
case llama_sampler_type::ADAPTIVE_P: use_adaptive_p = true; break;
default : break;
}
}
if (use_adaptive_p) {
// adaptive p should be put to the last, so we ignore the order in the sampler
llama_sample_adaptive_p(ctx_main, ctx_sampling->adapt_p_ctx, &cur_p);
}
}
@@ -464,7 +469,7 @@ static llama_token llama_sampling_sample_impl(
} else if (mirostat == 2) {
llama_sample_temp(ctx_main, &cur_p, temp);
id = llama_sample_token_mirostat_v2(ctx_main, &cur_p, mirostat_tau, mirostat_eta, &ctx_sampling->mirostat_mu);
} else if (adaptive_target >= 0.0f) {
} else if (adaptive_target >= 0.0f && ctx_sampling->adapt_p_ctx!=nullptr) {
// adaptive p sampling
static thread_local std::vector<float> orig_probs;
orig_probs.resize(cur_p.size);