fix adaptive p sampler rewinding too far back (#1359)

* fix adaptive p sampler rewinding too far back * update comments * correct default value for total_weight, more comments * new variables/names * update comment for n_rewind * move null pointer check back to common_sampler_review() * refactor weighted_sum and total_weight to vector<pair>, better boundary check in llama_review_adaptive_p_impl()
2026-05-24 22:59:14 +00:00 · 2026-03-04 07:26:25 -05:00
parent f27678d39b
commit a903409a5e
7 changed files with 75 additions and 43 deletions
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@@ -3332,6 +3332,7 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
    bool next_token = has_next_token(result, slot);
    bool send_result = slot.token_buffer.size() >= slot.n_buffer || !next_token;
    int32_t n_rewind = 0;
+    bool sent_results = false;
    // don't restore if last time was also rewind
    if (!slot.rewind_status) {
        slot.ctx_sampling->params.logit_bias = slot.logit_bias; // restore logit bias
@@ -3343,7 +3344,6 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
    if (n_rewind > 0 && (slot.rewind_count <20 || slot.rewind_count <= 2 * slot.ban_phrases.size())) {
        rewind_context(slot, n_rewind);
        slot.rewind_status = true;
-        slot.ctx_sampling->rewind_samplers = true;
    }
    else if (send_result) {
        slot.rewind_status = false;
@@ -3356,12 +3356,14 @@ void server_context::buffer_and_check_string_ban(server_slot & slot, completion_
            // send 1 token
            send_token_results(slot.token_buffer, slot, 1);
        }
-        slot.ctx_sampling->record_samplers = true;
+        sent_results = true;
    }
    else {
        // buffer the result
        slot.sampled = result.tok; // for common batch add
    }
+
+    slot.ctx_sampling->n_rewind = sent_results ? -1 : n_rewind;
 }

 void server_context::process_batch_tokens(int32_t & n_batch) {