diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 6d79cce4..af9e1afb 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -1644,6 +1644,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o res->final_result = false; res->id = slot.id_task; res->id_multi = slot.id_multi; + res->index = slot.task->index; res->error = false; res->stop = false; res->stream = slot.params.stream; @@ -1715,6 +1716,7 @@ void server_context::send_final_response(server_slot& slot) { res->final_result = true; res->id = slot.id_task; res->id_multi = slot.id_multi; + res->index = slot.task->index; res->error = false; res->stop = true; // to do: set value res->stream = slot.params.stream; @@ -1770,6 +1772,8 @@ void server_context::send_final_response(server_slot& slot) { void server_context::send_embedding(const server_slot& slot, const llama_batch& batch) { auto res = std::make_unique(); res->id = slot.task->id; + res->index = slot.task->index; + res->server_task_result::index = slot.task->index; res->n_tokens = slot.prompt_tokens.size(); res->oaicompat = slot.task->params.oaicompat;