Fix adaptive p sampler bug with string ban (#1287)

* adaptive p: upadte internal state only if not rewinding

* adaptive p: conditional update for speculative decoding

* adaptive p: refactor to rewind instead of update

* adaptive p fix: better comments

* fix rewind check

* add record to handle multi-token rewind

* better comment
This commit is contained in:
dungquixote42
2026-02-20 01:11:36 -05:00
committed by GitHub
parent b855bf92de
commit 0f411b02e2
7 changed files with 71 additions and 16 deletions

View File

@@ -83,6 +83,10 @@ struct llama_sampler_adaptive_p {
// first referenced in sample_token
std::vector<float> cum_probs; // cumulative probability distribution
// recorded states for rewinding
float recd_weighted_sum;
float recd_total_weight;
};
struct llama_sampler_adaptive_p * llama_init_adaptive_p_impl(int n_vocab,
@@ -101,6 +105,8 @@ void llama_sample_adaptive_p_impl(
llama_token_data_array * candidates,
struct llama_sampler_adaptive_p * adapt_p_ctx);
void llama_review_adaptive_p_impl(llama_sampler_adaptive_p * adapt_p_ctx, const bool record, const bool rewind);
void llama_sample_repetition_penalties_impl(
struct llama_sampling * smpl,