Faster adaptive_p sampling (#1165)

* A hopefully more efficient adaptive_p sampling * Once at it, lets fix the formatting too * More formatting * Hopefully better * This should be better * Correctly accumulate adaptive_p sampling time * AVX2
2026-03-11 22:40:01 +00:00 · 2026-01-19 16:03:09 +02:00
parent fa58c20c42
commit 98b30e5e81
7 changed files with 87 additions and 91 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -118,7 +118,9 @@ struct llama_sampling_context * common_sampler_init(const struct llama_vocab* vo
            }
            case llama_sampler_type::ADAPTIVE_P:
            {
-                result->adapt_p_ctx = llama_init_adaptive_p(params.adaptive_target, params.adaptive_decay, result->rng());
+                GGML_ASSERT(vocab);
+                auto n_vocab = llama_vocab_n_tokens(vocab);
+                result->adapt_p_ctx = llama_init_adaptive_p(n_vocab, params.adaptive_target, params.adaptive_decay, result->rng());
                break;
            }
            default: