Gracefully fail the decode instead of crashing for kshift Deepseek error (#688)

* Gracefuly fail the decode instead of crashing for kshift Deepseek error) * fix formatting * minor
2026-05-11 08:30:19 +00:00 · 2025-08-13 05:12:40 -05:00
parent d99cf7cb71
commit e082df47f2
2 changed files with 13 additions and 6 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -792,7 +792,10 @@ extern "C" {
    LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx);

    // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
-    LLAMA_API void llama_kv_cache_update(struct llama_context * ctx);
+    // Positive return values does not mean a fatal error, but rather a warning.
+    //    0 - success
+    //    1 - Context overflow in a model where k-shift is not supported
+    LLAMA_API int32_t llama_kv_cache_update(struct llama_context * ctx);

    //
    // State / sessions