mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-26 16:14:10 +00:00
server: propagate task index to response objects for batch requests (#1303)
When multiple prompts are sent in a single /v1/completions request, each response needs to carry the correct index so the client can match results to their corresponding prompts. The index field was not being set on partial responses, final responses, or embedding responses, causing batch results to all report index 0. Set res->index = slot.task->index in send_partial_response, send_final_response, and send_embedding. Generated with [Devin](https://cli.devin.ai/docs) Co-authored-by: Joshua Jolley <jjolley@clearwateranalytics.com> Co-authored-by: Devin <noreply@cognition.ai>
This commit is contained in:
@@ -1644,6 +1644,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
|
||||
res->final_result = false;
|
||||
res->id = slot.id_task;
|
||||
res->id_multi = slot.id_multi;
|
||||
res->index = slot.task->index;
|
||||
res->error = false;
|
||||
res->stop = false;
|
||||
res->stream = slot.params.stream;
|
||||
@@ -1715,6 +1716,7 @@ void server_context::send_final_response(server_slot& slot) {
|
||||
res->final_result = true;
|
||||
res->id = slot.id_task;
|
||||
res->id_multi = slot.id_multi;
|
||||
res->index = slot.task->index;
|
||||
res->error = false;
|
||||
res->stop = true; // to do: set value
|
||||
res->stream = slot.params.stream;
|
||||
@@ -1770,6 +1772,8 @@ void server_context::send_final_response(server_slot& slot) {
|
||||
void server_context::send_embedding(const server_slot& slot, const llama_batch& batch) {
|
||||
auto res = std::make_unique<server_task_result_embd>();
|
||||
res->id = slot.task->id;
|
||||
res->index = slot.task->index;
|
||||
res->server_task_result::index = slot.task->index;
|
||||
res->n_tokens = slot.prompt_tokens.size();
|
||||
res->oaicompat = slot.task->params.oaicompat;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user