fix adaptive p sampler rewinding too far back (#1359)

* fix adaptive p sampler rewinding too far back

* update comments

* correct default value for total_weight, more comments

* new variables/names

* update comment for n_rewind

* move null pointer check back to common_sampler_review()

* refactor weighted_sum and total_weight to vector<pair>, better boundary check in llama_review_adaptive_p_impl()
This commit is contained in:
dungquixote42
2026-03-04 07:26:25 -05:00
committed by GitHub
parent f27678d39b
commit a903409a5e
7 changed files with 75 additions and 43 deletions

View File

@@ -3332,6 +3332,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
bool next_token = has_next_token(result, slot);
bool send_result = slot.token_buffer.size() >= slot.n_buffer || !next_token;
int32_t n_rewind = 0;
bool sent_results = false;
// don't restore if last time was also rewind
if (!slot.rewind_status) {
slot.ctx_sampling->params.logit_bias = slot.logit_bias; // restore logit bias
@@ -3343,7 +3344,6 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
if (n_rewind > 0 && (slot.rewind_count <20 || slot.rewind_count <= 2 * slot.ban_phrases.size())) {
rewind_context(slot, n_rewind);
slot.rewind_status = true;
slot.ctx_sampling->rewind_samplers = true;
}
else if (send_result) {
slot.rewind_status = false;
@@ -3356,12 +3356,14 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
// send 1 token
send_token_results(slot.token_buffer, slot, 1);
}
slot.ctx_sampling->record_samplers = true;
sent_results = true;
}
else {
// buffer the result
slot.sampled = result.tok; // for common batch add
}
slot.ctx_sampling->n_rewind = sent_results ? -1 : n_rewind;
}
void server_context::process_batch_tokens(int32_t & n_batch) {