This seems to work

2026-02-24 15:14:10 +00:00 · 2025-08-30 04:45:31 -05:00
parent f529c3a808
commit 640f9b6485
2 changed files with 28 additions and 1 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2572,7 +2572,7 @@ struct server_context {
                                GGML_ASSERT(slot.ga_n == 1);

                                // reuse any previously computed tokens that are common with the new prompt
-                                slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
+                                slot.n_past = common_part(ctx, model, slot.cache_tokens, slot.prompt);

                                // push the prompt into the sampling context (do not apply grammar)
                                for (int i = 0; i < slot.n_past; ++i) {