mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-10 16:30:12 +00:00
fix model name missing in final response (#1250)
Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -1631,12 +1631,11 @@ void server_context::send_final_response(server_slot& slot) {
|
||||
res->timings = slot.get_timings();
|
||||
res->post_sampling_probs = slot.params.post_sampling_probs;
|
||||
res->oaicompat = slot.params.oaicompat;
|
||||
res->oaicompat_model = slot.params.oaicompat_model;
|
||||
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
||||
res->oaicompat_msg = slot.update_chat_msg(res->oaicompat_msg_diffs);
|
||||
res->n_decoded = slot.n_decoded;
|
||||
res->n_prompt_tokens = slot.n_prompt_tokens;
|
||||
res->oaicompat_model = slot.oaicompat_model;
|
||||
res->oaicompat_model = slot.task->params.oaicompat_model;
|
||||
res->data = json{
|
||||
{"content", !slot.params.stream ? slot.generated_text : ""},
|
||||
{"generated_text", slot.generated_text}, // Always include full text for finish_reason logic
|
||||
@@ -2590,9 +2589,9 @@ void server_context::batch_pending_prompt(const int32_t n_ubatch, const int32_t
|
||||
|
||||
slot.state = SLOT_STATE_PROCESSING;
|
||||
slot.command = SLOT_COMMAND_NONE;
|
||||
send_final_response(slot);
|
||||
slot.release();
|
||||
slot.print_timings();
|
||||
send_final_response(slot);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2933,9 +2932,9 @@ void server_context::speculative_decoding_accept() {
|
||||
|
||||
if (!process_token(result, slot)) {
|
||||
// release slot because of stop condition
|
||||
send_final_response(slot);
|
||||
slot.release();
|
||||
slot.print_timings();
|
||||
send_final_response(slot);
|
||||
metrics.on_prediction(slot);
|
||||
break;
|
||||
}
|
||||
@@ -2953,7 +2952,7 @@ void server_context::speculative_decoding_accept() {
|
||||
|
||||
bool server_context::accept_special_token(const server_slot& slot, const llama_token token) {
|
||||
return params_base.special || slot.sparams.preserved_tokens.find(token) != slot.sparams.preserved_tokens.end();
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
void server_context::send_token_results(completion_token_outputs& results, server_slot& slot, int32_t n) {
|
||||
@@ -2962,9 +2961,9 @@ void server_context::send_token_results(completion_token_outputs& results, serve
|
||||
bool has_next = process_token(it, slot);
|
||||
count++;
|
||||
if (!has_next) {
|
||||
send_final_response(slot);
|
||||
slot.release();
|
||||
slot.print_timings();
|
||||
send_final_response(slot);
|
||||
metrics.on_prediction(slot);
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user