From 88f98c891de8b87a0ec432a5ad7a750a26e97097 Mon Sep 17 00:00:00 2001 From: Samuel Oliveira Alves <107287165+SamuelOliveirads@users.noreply.github.com> Date: Tue, 17 Feb 2026 08:33:28 -0300 Subject: [PATCH] server: add string ban in speculative path (#1274) --- examples/server/server-context.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 43d9c0a9..81fcf060 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -2996,13 +2996,17 @@ void server_context::speculative_decoding_accept() { populate_token_probs(slot, result, slot.params.post_sampling_probs, params_base.special, i); } - if (!process_token(result, slot)) { - // release slot because of stop condition - send_final_response(slot); - slot.release(); - slot.print_timings(); - metrics.on_prediction(slot); - break; + if (slot.n_buffer == 0) { + if (!process_token(result, slot)) { + // release slot because of stop condition + send_final_response(slot); + slot.release(); + slot.print_timings(); + metrics.on_prediction(slot); + break; + } + } else { + buffer_and_check_string_ban(slot, result); } } SLT_DBG(slot, "accepted %d/%d draft tokens, new n_tokens = %d\n", (int)ids.size() - 1, (int)slot.drafted.size(), slot.n_past);