mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 10:51:51 +00:00
Fix adaptive p sampler bug with string ban (#1287)
* adaptive p: upadte internal state only if not rewinding * adaptive p: conditional update for speculative decoding * adaptive p: refactor to rewind instead of update * adaptive p fix: better comments * fix rewind check * add record to handle multi-token rewind * better comment
This commit is contained in:
@@ -83,6 +83,10 @@ struct llama_sampler_adaptive_p {
|
||||
|
||||
// first referenced in sample_token
|
||||
std::vector<float> cum_probs; // cumulative probability distribution
|
||||
|
||||
// recorded states for rewinding
|
||||
float recd_weighted_sum;
|
||||
float recd_total_weight;
|
||||
};
|
||||
|
||||
struct llama_sampler_adaptive_p * llama_init_adaptive_p_impl(int n_vocab,
|
||||
@@ -101,6 +105,8 @@ void llama_sample_adaptive_p_impl(
|
||||
llama_token_data_array * candidates,
|
||||
struct llama_sampler_adaptive_p * adapt_p_ctx);
|
||||
|
||||
void llama_review_adaptive_p_impl(llama_sampler_adaptive_p * adapt_p_ctx, const bool record, const bool rewind);
|
||||
|
||||
|
||||
void llama_sample_repetition_penalties_impl(
|
||||
struct llama_sampling * smpl,
|
||||
|
||||
Reference in New Issue
Block a user