From 79185862e5ee4388fb11f84646ee8e1e9d5ebf75 Mon Sep 17 00:00:00 2001
From: firecoperana <firecoperana>
Date: Wed, 14 Jan 2026 09:01:00 -0600
Subject: [PATCH] change

---
 examples/server/server-context.cpp | 12 ++----------
 examples/server/server-context.h   |  2 --
 2 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp
index 50ea02f9..7f827e95 100644
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@@ -2587,10 +2587,6 @@ void server_context::extend_context(const int32_t n_tokens) {
     }
 }
 
-void server_context::handle_decode_result(const int ret) {
-
-}
-
 void server_context::speculative_decoding_accept() {
     for (auto& slot : slots) {
         if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch_dft.empty()) {
@@ -2676,15 +2672,11 @@ void server_context::process_batch_tokens(int32_t & n_batch) {
         if (ret != 0) {
             if (n_batch == 1 || ret < 0) {
                 int user_cancel = -3;
-                // if you get here, it means the KV cache is full - try increasing it via the context size
                 if (ret == user_cancel) {
-                    LOG_ERROR("Decode process is cancelled by user", {
-                        {"i",   i},
-                        {"n_batch",  ret},
-                        {"ret",   ret},
-                        });
+                    LLAMA_LOG_INFO("Decode process is cancelled by user.\n");
                 }
                 else {
+                    // if you get here, it means the KV cache is full - try increasing it via the context size
                     LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", {
                         {"i",   i},
                         {"n_batch",  ret},
diff --git a/examples/server/server-context.h b/examples/server/server-context.h
index e8d99ae5..34493565 100644
--- a/examples/server/server-context.h
+++ b/examples/server/server-context.h
@@ -311,8 +311,6 @@ struct server_context {
 
     void extend_context(const int32_t n_tokens);
 
-    void handle_decode_result(const int ret);
-
     void speculative_decoding_accept();
 
     bool accept_special_token(const server_slot& slot, const llama_token token);