From 79185862e5ee4388fb11f84646ee8e1e9d5ebf75 Mon Sep 17 00:00:00 2001 From: firecoperana Date: Wed, 14 Jan 2026 09:01:00 -0600 Subject: [PATCH] change --- examples/server/server-context.cpp | 12 ++---------- examples/server/server-context.h | 2 -- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 50ea02f9..7f827e95 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -2587,10 +2587,6 @@ void server_context::extend_context(const int32_t n_tokens) { } } -void server_context::handle_decode_result(const int ret) { - -} - void server_context::speculative_decoding_accept() { for (auto& slot : slots) { if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch_dft.empty()) { @@ -2676,15 +2672,11 @@ void server_context::process_batch_tokens(int32_t & n_batch) { if (ret != 0) { if (n_batch == 1 || ret < 0) { int user_cancel = -3; - // if you get here, it means the KV cache is full - try increasing it via the context size if (ret == user_cancel) { - LOG_ERROR("Decode process is cancelled by user", { - {"i", i}, - {"n_batch", ret}, - {"ret", ret}, - }); + LLAMA_LOG_INFO("Decode process is cancelled by user.\n"); } else { + // if you get here, it means the KV cache is full - try increasing it via the context size LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", { {"i", i}, {"n_batch", ret}, diff --git a/examples/server/server-context.h b/examples/server/server-context.h index e8d99ae5..34493565 100644 --- a/examples/server/server-context.h +++ b/examples/server/server-context.h @@ -311,8 +311,6 @@ struct server_context { void extend_context(const int32_t n_tokens); - void handle_decode_result(const int ret); - void speculative_decoding_accept(); bool accept_special_token(const server_slot& slot, const llama_token token);