server: enable checkpoint for recurrent models (#1310)

* server: enable checkpoint for recurrent models create checkpoint after cancel fix ban string and rm context during rewind add checkpoint interval only save recurrent cache * save checkpoint during pp --------- Co-authored-by: firecoperana <firecoperana>
2026-03-01 01:24:08 +00:00 · 2026-02-25 23:51:18 -06:00
parent 216f44363f
commit 3fac78c48b
11 changed files with 204 additions and 111 deletions
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -142,7 +142,7 @@ ggml_cgraph * llm_build_context::build_k_shift() {
    ggml_set_input(lctx.inp_K_shift);

    for (int il = 0; il < n_layer; ++il) {
-        if ((model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_QWEN35MOE) && hparams.is_recurrent(il)) {
+        if (llm_arch_is_hybrid(model.arch) && hparams.is_recurrent(il)) {
            continue;
        }
        if (kv_self.k_l[il] == nullptr) {
@@ -241,7 +241,7 @@ ggml_cgraph * llm_build_context::build_defrag(const std::vector<uint32_t> & ids)
        }

        for (int il = 0; il < n_layer; ++il) {
-            if ((model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_QWEN35MOE) && hparams.is_recurrent(il)) {
+            if (llm_arch_is_hybrid(model.arch) && hparams.is_recurrent(il)) {
                continue;
            }
            if (kv_self.k_l[il] == nullptr) {