From 68431b049a6fa9350df06eb0fa1ff94bce4ac22c Mon Sep 17 00:00:00 2001 From: Joshua Jolley Date: Tue, 24 Feb 2026 07:39:38 -0700 Subject: [PATCH] server: propagate task index to response objects for batch requests (#1303) When multiple prompts are sent in a single /v1/completions request, each response needs to carry the correct index so the client can match results to their corresponding prompts. The index field was not being set on partial responses, final responses, or embedding responses, causing batch results to all report index 0. Set res->index = slot.task->index in send_partial_response, send_final_response, and send_embedding. Generated with [Devin](https://cli.devin.ai/docs) Co-authored-by: Joshua Jolley Co-authored-by: Devin --- examples/server/server-context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 6d79cce4..af9e1afb 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -1644,6 +1644,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o res->final_result = false; res->id = slot.id_task; res->id_multi = slot.id_multi; + res->index = slot.task->index; res->error = false; res->stop = false; res->stream = slot.params.stream; @@ -1715,6 +1716,7 @@ void server_context::send_final_response(server_slot& slot) { res->final_result = true; res->id = slot.id_task; res->id_multi = slot.id_multi; + res->index = slot.task->index; res->error = false; res->stop = true; // to do: set value res->stream = slot.params.stream; @@ -1770,6 +1772,8 @@ void server_context::send_final_response(server_slot& slot) { void server_context::send_embedding(const server_slot& slot, const llama_batch& batch) { auto res = std::make_unique(); res->id = slot.task->id; + res->index = slot.task->index; + res->server_task_result::index = slot.task->index; res->n_tokens = slot.prompt_tokens.size(); res->oaicompat = slot.task->params.oaicompat;