server: enable checkpoint for recurrent models (#1310)

* server: enable checkpoint for recurrent models

create checkpoint after cancel

fix ban string and rm context during rewind

add checkpoint interval

only save recurrent cache

* save checkpoint during pp

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2026-02-25 23:51:18 -06:00
committed by GitHub
parent 216f44363f
commit 3fac78c48b
11 changed files with 204 additions and 111 deletions

View File

@@ -188,8 +188,8 @@ int main(int argc, char ** argv) {
// save seq 0 and load into seq 1
{
// save kv of seq 0
std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0));
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0, 0));
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0, 0);
if (ncopy != seq_store.size()) {
fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
llama_free(ctx3);
@@ -203,7 +203,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s : kv cache cleared\n", __func__);
// restore kv into seq 1
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1, 0);
if (nset != seq_store.size()) {
fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
llama_free(ctx3);