Fix Anthropic Messages API (#1136)

* server: stop processing the prompt when client disconnects implement generator-based API for task results Update httplib.h to 0.27.0 Fix embedding error Stop prompt processing when disconnected * Port upstream https://github.com/ggml-org/llama.cpp/pull/18551 * add back anthropic * Fix merge issue caused by github webui --------- Co-authored-by: firecoperana <firecoperana>
2026-02-06 06:20:08 +00:00 · 2026-01-13 17:37:29 +11:00
parent 013831bba5
commit e1c4c4a495
6 changed files with 165 additions and 28 deletions
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@@ -1482,7 +1482,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
    res->content = tkn.text_to_send;
    res->post_sampling_probs = slot.params.post_sampling_probs;
    res->oaicompat = slot.params.oaicompat;
-    res->oaicompat_model = slot.params.oaicompat_model;
+    res->oaicompat_model = slot.task->params.oaicompat_model;
    res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
    res->n_decoded = slot.n_decoded;
    res->n_prompt_tokens = slot.n_prompt_tokens;
@@ -1494,6 +1494,20 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
    };
    slot.update_chat_msg(res->oaicompat_msg_diffs);

+    res->anthropic_has_reasoning = !slot.chat_msg.reasoning_content.empty();
+
+    res->anthropic_thinking_block_started = slot.anthropic_thinking_block_started;
+    res->anthropic_text_block_started = slot.anthropic_text_block_started;
+
+    for (const auto& diff : res->oaicompat_msg_diffs) {
+        if (!diff.reasoning_content_delta.empty() && !slot.anthropic_thinking_block_started) {
+            slot.anthropic_thinking_block_started = true;
+        }
+        if (!diff.content_delta.empty() && !slot.anthropic_text_block_started) {
+            slot.anthropic_text_block_started = true;
+        }
+    }
+
    // populate res->probs_output
    if (slot.sparams.n_probs > 0) {
        res->probs_output = { tkn }; // copy the token probs