mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-20 13:14:09 +00:00
server: add /v1/responses support (#1184)
* server: add /v1/responses support * server: fix Responses API model fallback and SSE branching
This commit is contained in:
@@ -337,6 +337,13 @@ void server_slot::reset() {
|
||||
json_schema = json();
|
||||
generated_tool_call_ids.clear();
|
||||
|
||||
oai_resp_thinking_block_started = false;
|
||||
oai_resp_text_block_started = false;
|
||||
oai_resp_id.clear();
|
||||
oai_resp_reasoning_id.clear();
|
||||
oai_resp_message_id.clear();
|
||||
oai_resp_fc_id.clear();
|
||||
|
||||
task.reset();
|
||||
}
|
||||
|
||||
@@ -791,7 +798,7 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
|
||||
const llama_vocab* vocab = llama_model_get_vocab(model);
|
||||
if (data.count("__oaicompat") != 0) {
|
||||
slot.oaicompat = true;
|
||||
slot.oaicompat_model = json_value(data, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
|
||||
slot.oaicompat_model = task.params.oaicompat_model;
|
||||
}
|
||||
else {
|
||||
slot.oaicompat = false;
|
||||
@@ -799,6 +806,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
|
||||
}
|
||||
slot.params.oaicompat = task.params.oaicompat;
|
||||
slot.params.oaicompat_cmpl_id =task.params.oaicompat_cmpl_id;
|
||||
|
||||
slot.oai_resp_thinking_block_started = false;
|
||||
slot.oai_resp_text_block_started = false;
|
||||
slot.oai_resp_id = "resp_" + random_string();
|
||||
slot.oai_resp_reasoning_id = "rs_" + random_string();
|
||||
slot.oai_resp_message_id = "msg_" + random_string();
|
||||
slot.oai_resp_fc_id.clear();
|
||||
slot.params.timings_per_token = json_value(data, "timings_per_token", false);
|
||||
slot.params.stream = json_value(data, "stream", false);
|
||||
auto stream_opt = json_value(data, "stream_options", json::object());
|
||||
@@ -1593,6 +1607,10 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
|
||||
res->oaicompat = slot.params.oaicompat;
|
||||
res->oaicompat_model = slot.task->params.oaicompat_model;
|
||||
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
||||
res->oai_resp_id = slot.oai_resp_id;
|
||||
res->oai_resp_reasoning_id = slot.oai_resp_reasoning_id;
|
||||
res->oai_resp_message_id = slot.oai_resp_message_id;
|
||||
res->oai_resp_fc_id = slot.oai_resp_fc_id;
|
||||
res->n_decoded = slot.n_decoded;
|
||||
res->n_prompt_tokens = slot.n_prompt_tokens;
|
||||
res->data = json{
|
||||
@@ -1608,6 +1626,9 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
|
||||
res->anthropic_thinking_block_started = slot.anthropic_thinking_block_started;
|
||||
res->anthropic_text_block_started = slot.anthropic_text_block_started;
|
||||
|
||||
res->oai_resp_thinking_block_started = slot.oai_resp_thinking_block_started;
|
||||
res->oai_resp_text_block_started = slot.oai_resp_text_block_started;
|
||||
|
||||
for (const auto& diff : res->oaicompat_msg_diffs) {
|
||||
if (!diff.reasoning_content_delta.empty() && !slot.anthropic_thinking_block_started) {
|
||||
slot.anthropic_thinking_block_started = true;
|
||||
@@ -1615,6 +1636,15 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
|
||||
if (!diff.content_delta.empty() && !slot.anthropic_text_block_started) {
|
||||
slot.anthropic_text_block_started = true;
|
||||
}
|
||||
if (!diff.reasoning_content_delta.empty() && !slot.oai_resp_thinking_block_started) {
|
||||
slot.oai_resp_thinking_block_started = true;
|
||||
}
|
||||
if (!diff.content_delta.empty() && !slot.oai_resp_text_block_started) {
|
||||
slot.oai_resp_text_block_started = true;
|
||||
}
|
||||
if (!diff.tool_call_delta.name.empty()) {
|
||||
slot.oai_resp_fc_id = diff.tool_call_delta.id;
|
||||
}
|
||||
}
|
||||
|
||||
// populate res->probs_output
|
||||
@@ -1650,6 +1680,9 @@ void server_context::send_final_response(server_slot& slot) {
|
||||
res->oaicompat = slot.params.oaicompat;
|
||||
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
|
||||
res->oaicompat_msg = slot.update_chat_msg(res->oaicompat_msg_diffs);
|
||||
res->oai_resp_id = slot.oai_resp_id;
|
||||
res->oai_resp_reasoning_id = slot.oai_resp_reasoning_id;
|
||||
res->oai_resp_message_id = slot.oai_resp_message_id;
|
||||
res->n_decoded = slot.n_decoded;
|
||||
res->n_prompt_tokens = slot.n_prompt_tokens;
|
||||
res->oaicompat_model = slot.task->params.oaicompat_model;
|
||||
|
||||
Reference in New Issue
Block a user