Faster adaptive_p sampling (#1165)

* A hopefully more efficient adaptive_p sampling

* Once at it, lets fix the formatting too

* More formatting

* Hopefully better

* This should be better

* Correctly accumulate adaptive_p sampling time

* AVX2
This commit is contained in:
Kawrakow
2026-01-19 16:03:09 +02:00
committed by GitHub
parent fa58c20c42
commit 98b30e5e81
7 changed files with 87 additions and 91 deletions

View File

@@ -118,7 +118,9 @@ struct llama_sampling_context * common_sampler_init(const struct llama_vocab* vo
}
case llama_sampler_type::ADAPTIVE_P:
{
result->adapt_p_ctx = llama_init_adaptive_p(params.adaptive_target, params.adaptive_decay, result->rng());
GGML_ASSERT(vocab);
auto n_vocab = llama_vocab_n_tokens(vocab);
result->adapt_p_ctx = llama_init_adaptive_p(n_vocab, params.adaptive_target, params.adaptive_decay, result->rng());
break;
}
default: