mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
server: bug fix for preserved_tokens not preserved in process_token (#926)
Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -1969,7 +1969,7 @@ struct server_context {
|
||||
|
||||
bool process_token(completion_token_output & result, server_slot & slot) {
|
||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||
const std::string token_str = llama_token_to_piece(ctx, result.tok, params.special);
|
||||
const std::string token_str = result.text_to_send;
|
||||
slot.sampled = result.tok;
|
||||
|
||||
// search stop word and delete it
|
||||
@@ -3442,7 +3442,7 @@ struct server_context {
|
||||
completion_token_output result;
|
||||
|
||||
result.tok = ids[i];
|
||||
result.text_to_send = llama_token_to_piece(ctx, result.tok, params.special);
|
||||
result.text_to_send = llama_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok));
|
||||
result.prob = 1.0f; // set later
|
||||
|
||||
if (slot.sparams.n_probs > 0) {
|
||||
|
||||
Reference in New Issue
Block a user