mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-08 04:50:13 +00:00
fix adaptive p sampler rewinding too far back (#1359)
* fix adaptive p sampler rewinding too far back * update comments * correct default value for total_weight, more comments * new variables/names * update comment for n_rewind * move null pointer check back to common_sampler_review() * refactor weighted_sum and total_weight to vector<pair>, better boundary check in llama_review_adaptive_p_impl()
This commit is contained in:
@@ -3332,6 +3332,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
|
||||
bool next_token = has_next_token(result, slot);
|
||||
bool send_result = slot.token_buffer.size() >= slot.n_buffer || !next_token;
|
||||
int32_t n_rewind = 0;
|
||||
bool sent_results = false;
|
||||
// don't restore if last time was also rewind
|
||||
if (!slot.rewind_status) {
|
||||
slot.ctx_sampling->params.logit_bias = slot.logit_bias; // restore logit bias
|
||||
@@ -3343,7 +3344,6 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
|
||||
if (n_rewind > 0 && (slot.rewind_count <20 || slot.rewind_count <= 2 * slot.ban_phrases.size())) {
|
||||
rewind_context(slot, n_rewind);
|
||||
slot.rewind_status = true;
|
||||
slot.ctx_sampling->rewind_samplers = true;
|
||||
}
|
||||
else if (send_result) {
|
||||
slot.rewind_status = false;
|
||||
@@ -3356,12 +3356,14 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
|
||||
// send 1 token
|
||||
send_token_results(slot.token_buffer, slot, 1);
|
||||
}
|
||||
slot.ctx_sampling->record_samplers = true;
|
||||
sent_results = true;
|
||||
}
|
||||
else {
|
||||
// buffer the result
|
||||
slot.sampled = result.tok; // for common batch add
|
||||
}
|
||||
|
||||
slot.ctx_sampling->n_rewind = sent_results ? -1 : n_rewind;
|
||||
}
|
||||
|
||||
void server_context::process_batch_tokens(int32_t & n_batch) {
|
||||
|
||||
Reference in New Issue
Block a user