mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-22 14:14:32 +00:00
Fix adaptive p sampler bug with string ban (#1287)
* adaptive p: upadte internal state only if not rewinding * adaptive p: conditional update for speculative decoding * adaptive p: refactor to rewind instead of update * adaptive p fix: better comments * fix rewind check * add record to handle multi-token rewind * better comment
This commit is contained in:
@@ -3020,6 +3020,8 @@ void server_context::speculative_decoding_accept() {
|
||||
} else {
|
||||
buffer_and_check_string_ban(slot, result);
|
||||
}
|
||||
|
||||
common_sampler_review(slot.ctx_sampling);
|
||||
}
|
||||
SLT_DBG(slot, "accepted %d/%d draft tokens, new n_tokens = %d\n", (int)ids.size() - 1, (int)slot.drafted.size(), slot.n_past);
|
||||
LOG_VERBOSE("speculative decoding result", {
|
||||
@@ -3135,6 +3137,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
|
||||
if (n_rewind > 0 && (slot.rewind_count <20 || slot.rewind_count <= 2 * slot.ban_phrases.size())) {
|
||||
rewind_context(slot, n_rewind);
|
||||
slot.rewind_status = true;
|
||||
slot.ctx_sampling->rewind_samplers = true;
|
||||
}
|
||||
else if (send_result) {
|
||||
slot.rewind_status = false;
|
||||
@@ -3147,6 +3150,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
|
||||
// send 1 token
|
||||
send_token_results(slot.token_buffer, slot, 1);
|
||||
}
|
||||
slot.ctx_sampling->record_samplers = true;
|
||||
}
|
||||
else {
|
||||
// buffer the result
|
||||
@@ -3264,6 +3268,8 @@ void server_context::process_batch_tokens(int32_t & n_batch) {
|
||||
buffer_and_check_string_ban(slot, result);
|
||||
}
|
||||
|
||||
common_sampler_review(slot.ctx_sampling);
|
||||
|
||||
slot.i_batch = -1;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user