diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
index 3acba5d0..b2f5c91f 100644
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -82,28 +82,38 @@ bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
}
bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
- auto start_pos = input_.find(start_think, pos_);
- if (start_pos == std::string::npos) {
- return false;
- }
+ auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
+ auto stripped_reasoning = string_strip(reasoning);
+ if (stripped_reasoning.empty()) {
+ return;
+ }
+ if (syntax_.reasoning_in_content) {
+ add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : start_think);
+ add_content(stripped_reasoning);
+ if (closed) {
+ add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "" : end_think);
+ }
+ } else {
+ add_reasoning_content(stripped_reasoning);
+ }
+ };
- auto end_pos = input_.find(end_think, start_pos + start_think.size());
- if (end_pos == std::string::npos) {
- if (is_partial_) {
- // Partial reasoning content
- auto reasoning = input_.substr(start_pos + start_think.size());
- add_reasoning_content(string_strip(reasoning));
- pos_ = input_.size();
+ if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
+ if (syntax_.thinking_forced_open || try_consume_literal(start_think)) {
+ if (auto res = try_find_literal(end_think)) {
+ handle_reasoning(res->prelude, /* closed */ true);
+ consume_spaces();
+ return true;
+ }
+ auto rest = consume_rest();
+ if (!rest.empty()) {
+ handle_reasoning(rest, /* closed */ !is_partial());
+ }
+ // Allow unclosed thinking tags for now (following original llama.cpp)
return true;
}
- return false;
}
-
- // Extract reasoning content
- auto reasoning = input_.substr(start_pos + start_think.size(), end_pos - start_pos - start_think.size());
- add_reasoning_content(string_strip(reasoning));
- pos_ = end_pos + end_think.size();
- return true;
+ return false;
}
std::optional common_chat_msg_parser::try_find_literal_legacy(const std::string & literal) {
diff --git a/common/chat.cpp b/common/chat.cpp
index 15cfbbf0..f62c2801 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -278,6 +278,9 @@ void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
throw; // Re-throw for partial mode
}
}
+
+ // Add any remaining content (critical for responses without tool calls)
+ builder.add_content(builder.consume_rest());
}
// Parse DeepSeek R1 tools array format following original llama.cpp parse_prefixed_json_tool_call_array pattern
diff --git a/common/chat.h b/common/chat.h
index e23f84f3..5899ef1a 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -135,8 +135,18 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_KIMI_K2, // Our custom format (keep last for backward compatibility)
};
+enum common_reasoning_format {
+ COMMON_REASONING_FORMAT_NONE,
+ COMMON_REASONING_FORMAT_DEEPSEEK,
+ COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY,
+};
+
struct common_chat_syntax {
common_chat_format format = COMMON_CHAT_FORMAT_KIMI_K2;
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
+ // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
+ bool reasoning_in_content = false;
+ bool thinking_forced_open = false;
bool enable_thinking = false;
bool enable_tool_calls = true;
};
diff --git a/examples/server/function_calls.hpp b/examples/server/function_calls.hpp
index 068c5f24..92d25a0d 100644
--- a/examples/server/function_calls.hpp
+++ b/examples/server/function_calls.hpp
@@ -89,6 +89,8 @@ static ik_chat_msg parse_chat_message_incremental(const std::string& content, bo
try {
common_chat_syntax syntax;
syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
+ syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+ syntax.reasoning_in_content = true; // Fix for thinking tag termination issue
syntax.enable_tool_calls = true;
common_chat_msg_parser parser(content, is_partial, syntax);
diff --git a/tests/test-function-calls.cpp b/tests/test-function-calls.cpp
index cfd560be..59af3804 100644
--- a/tests/test-function-calls.cpp
+++ b/tests/test-function-calls.cpp
@@ -3298,6 +3298,63 @@ int main() {
std::cout << "✅ PASS: Qwen3 XML tool calls -> finish_reason='tool_calls'" << std::endl;
std::cout << "🎯 All streaming finish_reason tests passed!" << std::endl;
+
+ // TDD: Test for thinking tag termination issue - Reproduce user's exact complaint
+ std::cout << std::endl;
+ std::cout << "🧠 Testing DeepSeek R1 thinking tag termination issue..." << std::endl;
+
+ // Test case: Response wrapped entirely in think tags (reported issue)
+ std::string wrapped_response = "This should be content but is wrapped in think tags";
+
+ std::cout << "\n 1. REPRODUCING FAILURE - Without fix (reasoning_in_content=false):" << std::endl;
+
+ // First reproduce the failing behavior that user reported
+ common_chat_syntax broken_syntax;
+ broken_syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
+ broken_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+ broken_syntax.reasoning_in_content = false; // This causes the reported issue
+ broken_syntax.enable_tool_calls = false;
+
+ try {
+ auto broken_msg = common_chat_parse(wrapped_response, false, broken_syntax);
+ std::cout << " Content: '" << broken_msg.content << "'" << std::endl;
+ std::cout << " Reasoning: '" << broken_msg.reasoning_content << "'" << std::endl;
+
+ if (broken_msg.content.empty() && !broken_msg.reasoning_content.empty()) {
+ std::cout << " ❌ REPRODUCED USER BUG: Content disappears (thinking tags don't terminate properly)" << std::endl;
+ std::cout << " User sees: EMPTY CONTENT - this is exactly what was reported!" << std::endl;
+ }
+ } catch (const std::exception& e) {
+ std::cout << " ❌ Exception: " << e.what() << std::endl;
+ }
+
+ std::cout << "\n 2. DEMONSTRATING FIX - With fix (reasoning_in_content=true):" << std::endl;
+
+ // Now show the fix works
+ common_chat_syntax fixed_syntax;
+ fixed_syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
+ fixed_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+ fixed_syntax.reasoning_in_content = true; // Key fix: display thinking as content
+ fixed_syntax.enable_tool_calls = false;
+
+ try {
+ auto msg = common_chat_parse(wrapped_response, false, fixed_syntax);
+ std::cout << " Content: '" << msg.content << "'" << std::endl;
+ std::cout << " Reasoning: '" << msg.reasoning_content << "'" << std::endl;
+
+ if (msg.content.find("This should be content but is wrapped in think tags") != std::string::npos) {
+ std::cout << " ✅ PASS: Content properly preserved from think tags (with reasoning_in_content=true)" << std::endl;
+ std::cout << " User sees: Full content - this fixes the reported issue!" << std::endl;
+ } else if (msg.content.empty() && !msg.reasoning_content.empty()) {
+ std::cout << " ❌ FAILING TEST: Entire response treated as reasoning instead of content!" << std::endl;
+ std::cout << " Expected: Content should contain the text from within think tags" << std::endl;
+ } else {
+ std::cout << " ⚠️ PARTIAL: Some content found but may not contain expected text" << std::endl;
+ }
+ } catch (const std::exception& e) {
+ std::cout << " ❌ Exception in thinking tag test: " << e.what() << std::endl;
+ }
+
} catch (const std::exception& e) {
std::cout << std::endl;
std::cout << "❌ Test failed with exception: " << e.what() << std::endl;
@@ -3305,4 +3362,4 @@ int main() {
}
return 0;
-}
\ No newline at end of file
+}