Fix adaptive p sampler bug with string ban (#1287)

* adaptive p: upadte internal state only if not rewinding

* adaptive p: conditional update for speculative decoding

* adaptive p: refactor to rewind instead of update

* adaptive p fix: better comments

* fix rewind check

* add record to handle multi-token rewind

* better comment
This commit is contained in:
dungquixote42
2026-02-20 01:11:36 -05:00
committed by GitHub
parent b855bf92de
commit 0f411b02e2
7 changed files with 71 additions and 16 deletions

View File

@@ -3020,6 +3020,8 @@ void server_context::speculative_decoding_accept() {
} else {
buffer_and_check_string_ban(slot, result);
}
common_sampler_review(slot.ctx_sampling);
}
SLT_DBG(slot, "accepted %d/%d draft tokens, new n_tokens = %d\n", (int)ids.size() - 1, (int)slot.drafted.size(), slot.n_past);
LOG_VERBOSE("speculative decoding result", {
@@ -3135,6 +3137,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
if (n_rewind > 0 && (slot.rewind_count <20 || slot.rewind_count <= 2 * slot.ban_phrases.size())) {
rewind_context(slot, n_rewind);
slot.rewind_status = true;
slot.ctx_sampling->rewind_samplers = true;
}
else if (send_result) {
slot.rewind_status = false;
@@ -3147,6 +3150,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
// send 1 token
send_token_results(slot.token_buffer, slot, 1);
}
slot.ctx_sampling->record_samplers = true;
}
else {
// buffer the result
@@ -3264,6 +3268,8 @@ void server_context::process_batch_tokens(int32_t & n_batch) {
buffer_and_check_string_ban(slot, result);
}
common_sampler_review(slot.ctx_sampling);
slot.i_batch = -1;
}