server: enable checkpoint for recurrent models (#1310)

* server: enable checkpoint for recurrent models create checkpoint after cancel fix ban string and rm context during rewind add checkpoint interval only save recurrent cache * save checkpoint during pp --------- Co-authored-by: firecoperana <firecoperana>
2026-05-27 08:24:30 +00:00 · 2026-02-25 23:51:18 -06:00
parent 216f44363f
commit 3fac78c48b
11 changed files with 204 additions and 111 deletions
--- a/examples/server/server-task.cpp
+++ b/examples/server/server-task.cpp
@@ -1117,7 +1117,7 @@ bool server_prompt_cache::load(server_prompt& prompt, const server_tokens& token
    if (it_best != states.end()) {
        LLAMA_LOG_INFO(" - found better prompt with f_keep = %.3f, sim = %.3f, n_keep = %d, n_discarded_prompt = %d\n", f_keep_best, sim_best, it_best->n_kept_prompt, it_best->n_discarded_prompt);
        const size_t size = it_best->data.size();
-        const size_t n = llama_state_seq_set_data(ctx, it_best->data.data(), size, id_slot);
+        const size_t n = llama_state_seq_set_data(ctx, it_best->data.data(), size, id_slot, 0);
        if (n != size) {
            LLAMA_LOG_INFO("failed to restore state with size %zu\n", size);
            return false;