Implement Adaptive-P Sampler (#1100)

* initial implementation of adaptive-p sampler

* explicitly mark candidates unsorted + cleanup qualifiers

* cosmetic update

* reorg prototypes

* lockstep with mainline

* add _impl for _init + reorg

* add LLAMA_API to prototypes

* update sharpness to 10

* lockstep: rng seed

* delete llama_sampling member in llama_sampler_adaptive_p

* fix LLAMA_API return type

* lockstep: rng seed cont

* actually correct implementation

* lockstep: sorting behavior

* const -> constexpr for known constants

* add missing space

* fix softmax usage in adaptive p sampler

* cosmetic changes

* implement do-not-sort version of softmax

* simpify rng seed, add static to constexpr

* refactor: remove iface + use shared rng + use actually original probabilities

* adaptive-p: add dedicated rng back in

* fix initial max_logit + add float vector to adaptive p sampler context + stochastic sampling

* adaptive-p: fuse first softmax with transformation

* adaptive-p: implement binary search selection

* adaptive-p: update comment
This commit is contained in:
dungquixote42
2026-01-10 00:58:53 -05:00
committed by GitHub
parent dd3c3f72f2
commit 52ad1c6421
8 changed files with 226 additions and 10 deletions

View File

@@ -18,7 +18,8 @@ enum class llama_sampler_type : char {
XTC = 'x',
TOP_N_SIGMA = 'n',
TYPICAL_P = 'y',
TEMPERATURE = 't'
TEMPERATURE = 't',
ADAPTIVE_P = 'w',
};
enum common_grammar_trigger_type {
@@ -66,6 +67,8 @@ typedef struct llama_sampling_params {
float xtc_probability = 0.0f; // xtc probability
float xtc_threshold = 1.0f; // xtc threshold, disabled if > 0.5
float top_n_sigma = 0.0f; // top-n-sigma
float adaptive_target = -1.0f; // select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)
float adaptive_decay = 0.90f; // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
bool penalize_nl = false; // consider newlines as a repeatable token
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
@@ -80,7 +83,8 @@ typedef struct llama_sampling_params {
llama_sampler_type::MIN_P,
llama_sampler_type::XTC,
llama_sampler_type::TOP_N_SIGMA,
llama_sampler_type::TEMPERATURE
llama_sampler_type::TEMPERATURE,
llama_sampler_type::ADAPTIVE_P,
};
@@ -118,6 +122,8 @@ struct llama_sampling_context {
std::vector<llama_token_data> cur;
llama_sampler_dry* smpl;
llama_sampler_adaptive_p * adapt_p_ctx; // adaptive p sampler
size_t n_valid; // Number of correct top tokens with correct probabilities.
llama_token_data_array cur_p; // current candidates