Fix Anthropic Messages API (#1136)

* server: stop processing the prompt when client disconnects

implement generator-based API for task results

Update httplib.h to 0.27.0

Fix embedding error

Stop prompt processing when disconnected

* Port upstream https://github.com/ggml-org/llama.cpp/pull/18551

* add back anthropic

* Fix merge issue caused by github webui

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
hksdpc255
2026-01-13 17:37:29 +11:00
committed by GitHub
parent 013831bba5
commit e1c4c4a495
6 changed files with 165 additions and 28 deletions

View File

@@ -1482,7 +1482,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
res->content = tkn.text_to_send;
res->post_sampling_probs = slot.params.post_sampling_probs;
res->oaicompat = slot.params.oaicompat;
res->oaicompat_model = slot.params.oaicompat_model;
res->oaicompat_model = slot.task->params.oaicompat_model;
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
res->n_decoded = slot.n_decoded;
res->n_prompt_tokens = slot.n_prompt_tokens;
@@ -1494,6 +1494,20 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
};
slot.update_chat_msg(res->oaicompat_msg_diffs);
res->anthropic_has_reasoning = !slot.chat_msg.reasoning_content.empty();
res->anthropic_thinking_block_started = slot.anthropic_thinking_block_started;
res->anthropic_text_block_started = slot.anthropic_text_block_started;
for (const auto& diff : res->oaicompat_msg_diffs) {
if (!diff.reasoning_content_delta.empty() && !slot.anthropic_thinking_block_started) {
slot.anthropic_thinking_block_started = true;
}
if (!diff.content_delta.empty() && !slot.anthropic_text_block_started) {
slot.anthropic_text_block_started = true;
}
}
// populate res->probs_output
if (slot.sparams.n_probs > 0) {
res->probs_output = { tkn }; // copy the token probs