server: enable checkpoint for recurrent models (#1310)

* server: enable checkpoint for recurrent models

create checkpoint after cancel

fix ban string and rm context during rewind

add checkpoint interval

only save recurrent cache

* save checkpoint during pp

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2026-02-25 23:51:18 -06:00
committed by GitHub
parent 216f44363f
commit 3fac78c48b
11 changed files with 204 additions and 111 deletions

View File

@@ -142,7 +142,7 @@ ggml_cgraph * llm_build_context::build_k_shift() {
ggml_set_input(lctx.inp_K_shift);
for (int il = 0; il < n_layer; ++il) {
if ((model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_QWEN35MOE) && hparams.is_recurrent(il)) {
if (llm_arch_is_hybrid(model.arch) && hparams.is_recurrent(il)) {
continue;
}
if (kv_self.k_l[il] == nullptr) {
@@ -241,7 +241,7 @@ ggml_cgraph * llm_build_context::build_defrag(const std::vector<uint32_t> & ids)
}
for (int il = 0; il < n_layer; ++il) {
if ((model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_QWEN35MOE) && hparams.is_recurrent(il)) {
if (llm_arch_is_hybrid(model.arch) && hparams.is_recurrent(il)) {
continue;
}
if (kv_self.k_l[il] == nullptr) {