mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-27 00:24:11 +00:00
server: enable checkpoint for recurrent models (#1310)
* server: enable checkpoint for recurrent models create checkpoint after cancel fix ban string and rm context during rewind add checkpoint interval only save recurrent cache * save checkpoint during pp --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -188,8 +188,8 @@ int main(int argc, char ** argv) {
|
||||
// save seq 0 and load into seq 1
|
||||
{
|
||||
// save kv of seq 0
|
||||
std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0));
|
||||
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
|
||||
std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0, 0));
|
||||
const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0, 0);
|
||||
if (ncopy != seq_store.size()) {
|
||||
fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
|
||||
llama_free(ctx3);
|
||||
@@ -203,7 +203,7 @@ int main(int argc, char ** argv) {
|
||||
fprintf(stderr, "%s : kv cache cleared\n", __func__);
|
||||
|
||||
// restore kv into seq 1
|
||||
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
|
||||
const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1, 0);
|
||||
if (nset != seq_store.size()) {
|
||||
fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
|
||||
llama_free(ctx3);
|
||||
|
||||
Reference in New Issue
Block a user