Deepseek V3.1 native tool calling support (OpenAI Style) (#771)

2026-03-13 15:30:03 +00:00 · 2025-09-13 00:51:40 -05:00
parent de97c33b40
commit a6da22beb2
7 changed files with 375 additions and 6 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1355,6 +1355,13 @@ struct server_context {
        }

        metrics.init();
+
+        // thinking is enabled if:
+        // 1. It's not explicitly disabled (reasoning_budget == 0)
+        // 2. The chat template supports it
+        const bool enable_thinking = params.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates.get());
+        //LLAMA_LOG_INFO("Enable thinking? %d\n", enable_thinking);
+
        oai_parser_opt = {
            /* use_jinja             */ params.use_jinja,
            /* prefill_assistant     */ params.prefill_assistant,
@@ -1363,7 +1370,7 @@ struct server_context {
            /* common_chat_templates */ chat_templates.get(),
            /* allow_image           */  false,
            /* allow_audio           */  false,
-            /* enable_thinking       */ params.reasoning_budget != 0,
+            /* enable_thinking       */ enable_thinking,
        };
    }

--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -59,9 +59,9 @@ static T json_value(const json & body, const std::string & key, const T & defaul
    if (body.contains(key) && !body.at(key).is_null()) {
        try {
            return body.at(key);
-        } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) {
+        } catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const& err) {
            std::stringstream ss;
-            ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value.";
+            ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value: "<< err.what();
            LOG_WARNING(ss.str().c_str(), body);
            return default_value;
        }
@@ -557,6 +557,18 @@ static json oaicompat_chat_params_parse(
        inputs.chat_template_kwargs[item.key()] = item.value().dump();
    }

+    // parse the "enable_thinking" kwarg to override the default value
+    auto enable_thinking_kwarg = json_value(inputs.chat_template_kwargs, "enable_thinking", std::string(""));
+    if (enable_thinking_kwarg == "true") {
+        inputs.enable_thinking = true;
+    }
+    else if (enable_thinking_kwarg == "false") {
+        inputs.enable_thinking = false;
+    }
+    else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') {
+        throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)");
+    }
+
    /*"whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n"
        "when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled\n"*/
    bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" &&opt.prefill_assistant;
@@ -572,7 +584,7 @@ static json oaicompat_chat_params_parse(

        /* TODO: test this properly */
        inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-        if ((!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) {
+        if (inputs.enable_thinking) {
            throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
        }
        inputs.add_generation_prompt = true;