mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Deepseek V3.1 native tool calling support (OpenAI Style) (#771)
This commit is contained in:
154
common/chat.cpp
154
common/chat.cpp
@@ -165,6 +165,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|||||||
throw std::runtime_error("Invalid tool_choice: " + tool_choice);
|
throw std::runtime_error("Invalid tool_choice: " + tool_choice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
|
||||||
|
common_chat_templates_inputs dummy_inputs;
|
||||||
|
common_chat_msg msg;
|
||||||
|
msg.role = "user";
|
||||||
|
msg.content = "test";
|
||||||
|
dummy_inputs.messages = {msg};
|
||||||
|
dummy_inputs.enable_thinking = false;
|
||||||
|
const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
|
||||||
|
dummy_inputs.enable_thinking = true;
|
||||||
|
const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
|
||||||
|
return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
|
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
|
||||||
std::vector<common_chat_msg> msgs;
|
std::vector<common_chat_msg> msgs;
|
||||||
@@ -619,6 +632,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
|||||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||||
|
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
|
||||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||||
case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
|
case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
|
||||||
@@ -687,11 +701,13 @@ static void parse_json_tool_calls(
|
|||||||
size_t from = std::string::npos;
|
size_t from = std::string::npos;
|
||||||
auto first = true;
|
auto first = true;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
auto start_pos = builder.pos();
|
||||||
auto res = function_regex_start_only && first
|
auto res = function_regex_start_only && first
|
||||||
? builder.try_consume_regex(*function_regex_start_only)
|
? builder.try_consume_regex(*function_regex_start_only)
|
||||||
: function_regex
|
: function_regex
|
||||||
? builder.try_find_regex(*function_regex, from)
|
? builder.try_find_regex(*function_regex, from)
|
||||||
: std::nullopt;
|
: std::nullopt;
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
std::string name;
|
std::string name;
|
||||||
if (get_function_name) {
|
if (get_function_name) {
|
||||||
@@ -726,6 +742,8 @@ static void parse_json_tool_calls(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
throw common_chat_msg_partial_exception("incomplete tool call");
|
throw common_chat_msg_partial_exception("incomplete tool call");
|
||||||
|
} else {
|
||||||
|
builder.move_to(start_pos);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1316,6 +1334,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|||||||
}
|
}
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
|
common_chat_params data;
|
||||||
|
|
||||||
|
// Pass thinking context for DeepSeek V3.1 template
|
||||||
|
json additional_context = {
|
||||||
|
{"thinking", inputs.enable_thinking},
|
||||||
|
};
|
||||||
|
|
||||||
|
auto prompt = apply(tmpl, inputs,
|
||||||
|
/* messages_override= */ inputs.messages,
|
||||||
|
/* tools_override= */ std::nullopt,
|
||||||
|
additional_context);
|
||||||
|
data.prompt = prompt;
|
||||||
|
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||||
|
if (string_ends_with(data.prompt, "<think>")) {
|
||||||
|
if (!inputs.enable_thinking) {
|
||||||
|
data.prompt += "</think>";
|
||||||
|
} else {
|
||||||
|
data.thinking_forced_open = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
||||||
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
||||||
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||||
|
std::vector<std::string> tool_rules;
|
||||||
|
foreach_function(inputs.tools, [&](const json & tool) {
|
||||||
|
const auto & function = tool.at("function");
|
||||||
|
std::string name = function.at("name");
|
||||||
|
auto parameters = function.at("parameters");
|
||||||
|
builder.resolve_refs(parameters);
|
||||||
|
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||||
|
"( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
|
||||||
|
"\" " + builder.add_schema(name + "-args", parameters) + " "
|
||||||
|
"\"<|tool▁call▁end|>\""));
|
||||||
|
});
|
||||||
|
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
||||||
|
// so we accept common variants (then it's all constrained)
|
||||||
|
builder.add_rule("root",
|
||||||
|
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
||||||
|
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
|
||||||
|
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
||||||
|
"\"<|tool▁calls▁end|>\""
|
||||||
|
" space");
|
||||||
|
data.grammar_triggers.push_back({
|
||||||
|
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
||||||
|
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
||||||
|
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
||||||
|
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
||||||
|
"(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
|
||||||
|
});
|
||||||
|
data.preserved_tokens = {
|
||||||
|
"<think>",
|
||||||
|
"</think>",
|
||||||
|
"<|tool▁calls▁begin|>",
|
||||||
|
"<|tool▁call▁begin|>",
|
||||||
|
"<|tool▁sep|>",
|
||||||
|
"<|tool▁call▁end|>",
|
||||||
|
"<|tool▁calls▁end|>",
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
||||||
builder.try_parse_reasoning("<think>", "</think>");
|
builder.try_parse_reasoning("<think>", "</think>");
|
||||||
if (!builder.syntax().parse_tool_calls) {
|
if (!builder.syntax().parse_tool_calls) {
|
||||||
@@ -1337,6 +1420,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
|||||||
tool_calls_end);
|
tool_calls_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
|
||||||
|
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
|
||||||
|
|
||||||
|
static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
|
||||||
|
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
||||||
|
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
||||||
|
|
||||||
|
if (!builder.syntax().parse_tool_calls) {
|
||||||
|
LOG("%s: not parse_tool_calls\n", __func__);
|
||||||
|
builder.add_content(builder.consume_rest());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG("%s: parse_tool_calls\n", __func__);
|
||||||
|
|
||||||
|
parse_json_tool_calls(
|
||||||
|
builder,
|
||||||
|
/* block_open= */ tool_calls_begin,
|
||||||
|
/* function_regex_start_only= */ std::nullopt,
|
||||||
|
function_regex,
|
||||||
|
close_regex,
|
||||||
|
tool_calls_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
||||||
|
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
||||||
|
// First try to parse using the standard reasoning parsing method
|
||||||
|
LOG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
||||||
|
|
||||||
|
auto start_pos = builder.pos();
|
||||||
|
auto found_end_think = builder.try_find_literal("</think>");
|
||||||
|
builder.move_to(start_pos);
|
||||||
|
|
||||||
|
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
||||||
|
LOG("%s: no end_think, not partial, adding content\n", __func__);
|
||||||
|
common_chat_parse_deepseek_v3_1_content(builder);
|
||||||
|
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
||||||
|
// If reasoning was parsed successfully, the remaining content is regular content
|
||||||
|
LOG("%s: parsed reasoning, adding content\n", __func__);
|
||||||
|
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
|
||||||
|
common_chat_parse_deepseek_v3_1_content(builder);
|
||||||
|
} else {
|
||||||
|
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
||||||
|
LOG("%s: reasoning_format none, adding content\n", __func__);
|
||||||
|
common_chat_parse_deepseek_v3_1_content(builder);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// If no reasoning tags found, check if we should treat everything as reasoning
|
||||||
|
if (builder.syntax().thinking_forced_open) {
|
||||||
|
// If thinking is forced open but no tags found, treat everything as reasoning
|
||||||
|
LOG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
||||||
|
builder.add_reasoning_content(builder.consume_rest());
|
||||||
|
} else {
|
||||||
|
LOG("%s: no thinking_forced_open, adding content\n", __func__);
|
||||||
|
// <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
|
||||||
|
common_chat_parse_deepseek_v3_1_content(builder);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
common_chat_params data;
|
common_chat_params data;
|
||||||
auto prompt = apply(tmpl, inputs);
|
auto prompt = apply(tmpl, inputs);
|
||||||
@@ -1833,7 +1976,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
|||||||
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
||||||
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
||||||
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
|
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
|
||||||
"(\\s*"
|
"\\s*("
|
||||||
"(?:<tool_call>"
|
"(?:<tool_call>"
|
||||||
"|<function"
|
"|<function"
|
||||||
"|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
|
"|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
|
||||||
@@ -2124,6 +2267,12 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeepSeek V3.1: detect based on specific patterns in the template
|
||||||
|
if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
|
||||||
|
params.json_schema.is_null()) {
|
||||||
|
return common_chat_params_init_deepseek_v3_1(tmpl, params);
|
||||||
|
}
|
||||||
|
|
||||||
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
||||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
|
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
|
||||||
return common_chat_params_init_deepseek_r1(tmpl, params);
|
return common_chat_params_init_deepseek_r1(tmpl, params);
|
||||||
@@ -2286,6 +2435,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|||||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
||||||
common_chat_parse_deepseek_r1(builder);
|
common_chat_parse_deepseek_r1(builder);
|
||||||
break;
|
break;
|
||||||
|
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
|
||||||
|
common_chat_parse_deepseek_v3_1(builder);
|
||||||
|
break;
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
||||||
common_chat_parse_functionary_v3_2(builder);
|
common_chat_parse_functionary_v3_2(builder);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -107,6 +107,7 @@ enum common_chat_format {
|
|||||||
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||||
|
COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||||
COMMON_CHAT_FORMAT_GRANITE,
|
COMMON_CHAT_FORMAT_GRANITE,
|
||||||
@@ -196,6 +197,8 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_p
|
|||||||
|
|
||||||
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
||||||
|
|
||||||
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
||||||
|
|
||||||
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
||||||
// T can be std::string containing JSON or nlohmann::ordered_json
|
// T can be std::string containing JSON or nlohmann::ordered_json
|
||||||
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
||||||
|
|||||||
@@ -1355,6 +1355,13 @@ struct server_context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
metrics.init();
|
metrics.init();
|
||||||
|
|
||||||
|
// thinking is enabled if:
|
||||||
|
// 1. It's not explicitly disabled (reasoning_budget == 0)
|
||||||
|
// 2. The chat template supports it
|
||||||
|
const bool enable_thinking = params.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates.get());
|
||||||
|
//LLAMA_LOG_INFO("Enable thinking? %d\n", enable_thinking);
|
||||||
|
|
||||||
oai_parser_opt = {
|
oai_parser_opt = {
|
||||||
/* use_jinja */ params.use_jinja,
|
/* use_jinja */ params.use_jinja,
|
||||||
/* prefill_assistant */ params.prefill_assistant,
|
/* prefill_assistant */ params.prefill_assistant,
|
||||||
@@ -1363,7 +1370,7 @@ struct server_context {
|
|||||||
/* common_chat_templates */ chat_templates.get(),
|
/* common_chat_templates */ chat_templates.get(),
|
||||||
/* allow_image */ false,
|
/* allow_image */ false,
|
||||||
/* allow_audio */ false,
|
/* allow_audio */ false,
|
||||||
/* enable_thinking */ params.reasoning_budget != 0,
|
/* enable_thinking */ enable_thinking,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -59,9 +59,9 @@ static T json_value(const json & body, const std::string & key, const T & defaul
|
|||||||
if (body.contains(key) && !body.at(key).is_null()) {
|
if (body.contains(key) && !body.at(key).is_null()) {
|
||||||
try {
|
try {
|
||||||
return body.at(key);
|
return body.at(key);
|
||||||
} catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const &) {
|
} catch (NLOHMANN_JSON_NAMESPACE::detail::type_error const& err) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value.";
|
ss << "Wrong type supplied for parameter '" << key << "'. Expected '" << json(default_value).type_name() << "', using default value: "<< err.what();
|
||||||
LOG_WARNING(ss.str().c_str(), body);
|
LOG_WARNING(ss.str().c_str(), body);
|
||||||
return default_value;
|
return default_value;
|
||||||
}
|
}
|
||||||
@@ -557,6 +557,18 @@ static json oaicompat_chat_params_parse(
|
|||||||
inputs.chat_template_kwargs[item.key()] = item.value().dump();
|
inputs.chat_template_kwargs[item.key()] = item.value().dump();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parse the "enable_thinking" kwarg to override the default value
|
||||||
|
auto enable_thinking_kwarg = json_value(inputs.chat_template_kwargs, "enable_thinking", std::string(""));
|
||||||
|
if (enable_thinking_kwarg == "true") {
|
||||||
|
inputs.enable_thinking = true;
|
||||||
|
}
|
||||||
|
else if (enable_thinking_kwarg == "false") {
|
||||||
|
inputs.enable_thinking = false;
|
||||||
|
}
|
||||||
|
else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') {
|
||||||
|
throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)");
|
||||||
|
}
|
||||||
|
|
||||||
/*"whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n"
|
/*"whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n"
|
||||||
"when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled\n"*/
|
"when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled\n"*/
|
||||||
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" &&opt.prefill_assistant;
|
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" &&opt.prefill_assistant;
|
||||||
@@ -572,7 +584,7 @@ static json oaicompat_chat_params_parse(
|
|||||||
|
|
||||||
/* TODO: test this properly */
|
/* TODO: test this properly */
|
||||||
inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||||
if ((!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) {
|
if (inputs.enable_thinking) {
|
||||||
throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
|
throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
|
||||||
}
|
}
|
||||||
inputs.add_generation_prompt = true;
|
inputs.add_generation_prompt = true;
|
||||||
|
|||||||
@@ -22,4 +22,5 @@ These templates can be updated with the following commands:
|
|||||||
./scripts/get_chat_template.py Qwen/QwQ-32B > models/templates/Qwen-QwQ-32B.jinja
|
./scripts/get_chat_template.py Qwen/QwQ-32B > models/templates/Qwen-QwQ-32B.jinja
|
||||||
./scripts/get_chat_template.py Qwen/Qwen3-0.6B > models/templates/Qwen-Qwen3-0.6B.jinja
|
./scripts/get_chat_template.py Qwen/Qwen3-0.6B > models/templates/Qwen-Qwen3-0.6B.jinja
|
||||||
./scripts/get_chat_template.py zai-org/GLM-4.5 > models/templates/zai-org-GLM-4.5.jinja
|
./scripts/get_chat_template.py zai-org/GLM-4.5 > models/templates/zai-org-GLM-4.5.jinja
|
||||||
|
./scripts/get_chat_template.py deepseek-ai/DeepSeek-V3.1 > models/templates/deepseek-ai-DeepSeek-V3.1.jinja
|
||||||
```
|
```
|
||||||
|
|||||||
3
models/templates/deepseek-ai-DeepSeek-V3.1.jinja
Normal file
3
models/templates/deepseek-ai-DeepSeek-V3.1.jinja
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
|
||||||
|
|
||||||
|
' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
|
||||||
@@ -18,14 +18,20 @@
|
|||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
static void assert_equals(const T & expected, const T & actual) {
|
static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
|
||||||
if (expected != actual) {
|
if (expected != actual) {
|
||||||
|
std::cerr << label << std::endl;
|
||||||
std::cerr << "Expected: " << expected << std::endl;
|
std::cerr << "Expected: " << expected << std::endl;
|
||||||
std::cerr << "Actual: " << actual << std::endl;
|
std::cerr << "Actual: " << actual << std::endl;
|
||||||
std::cerr << std::flush;
|
std::cerr << std::flush;
|
||||||
throw std::runtime_error("Test failed");
|
throw std::runtime_error("Test failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
static void assert_equals(const T & expected, const T & actual) {
|
||||||
|
assert_equals("", expected, actual);
|
||||||
|
}
|
||||||
static void assert_equals(const char * expected, const std::string & actual) {
|
static void assert_equals(const char * expected, const std::string & actual) {
|
||||||
return assert_equals<std::string>(expected, actual);
|
return assert_equals<std::string>(expected, actual);
|
||||||
}
|
}
|
||||||
@@ -49,6 +55,7 @@ static void assert_throws(const std::function<void()> & fn, const std::string &
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void test_reasoning() {
|
static void test_reasoning() {
|
||||||
|
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||||
{
|
{
|
||||||
common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
|
common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
|
||||||
/* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
/* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
||||||
@@ -102,6 +109,36 @@ static void test_reasoning() {
|
|||||||
assert_equals("<think>Cogito</think>", builder.result().content);
|
assert_equals("<think>Cogito</think>", builder.result().content);
|
||||||
assert_equals("Ergo sum", builder.consume_rest());
|
assert_equals("Ergo sum", builder.consume_rest());
|
||||||
}
|
}
|
||||||
|
// Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
|
||||||
|
common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
|
||||||
|
assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
|
||||||
|
assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
|
||||||
|
assert_equals(variant, std::string("ok"), builder.consume_rest());
|
||||||
|
}
|
||||||
|
// Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("deepseek_v3_1_reasoning_format_none");
|
||||||
|
const std::string input = "REASONING</think>ok";
|
||||||
|
auto msg = common_chat_parse(input, false, syntax);
|
||||||
|
assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
|
||||||
|
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_regex() {
|
static void test_regex() {
|
||||||
@@ -189,6 +226,159 @@ static void test(const std::string & input, bool is_partial, const std::vector<s
|
|||||||
assert_equals(is_partial, js->is_partial);
|
assert_equals(is_partial, js->is_partial);
|
||||||
assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
|
assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_deepseek_v3_1_tool_calls() {
|
||||||
|
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||||
|
// variant: happy path for when it works as the model card says it should
|
||||||
|
const std::string variant("simple");
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ false,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto msg = common_chat_parse(input, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
|
||||||
|
// JSON arguments are dumped without spaces
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
|
||||||
|
assert_equals(variant, std::string(""), msg.content);
|
||||||
|
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||||
|
|
||||||
|
// variant: simple + thinking open
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("simple_thinking");
|
||||||
|
const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||||
|
assert_equals(variant, std::string(""), m.content);
|
||||||
|
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||||
|
}
|
||||||
|
// variant: simple + multiple tool calls
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ false,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("simple_multiple_tool_calls");
|
||||||
|
const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
|
||||||
|
assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
|
||||||
|
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||||
|
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// variant: thinking forced open + tool call in reasoning content
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("thinking_forced_open_tool_call_in_reasoning");
|
||||||
|
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||||
|
assert_equals(variant, std::string(""), m.content);
|
||||||
|
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// variant: thinking forced open + tool call in reasoning content + no closing think + not partial
|
||||||
|
// This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
|
||||||
|
// to make tool calls in reasoning content according to the model card, but it does sometimes, so
|
||||||
|
// add the reasoning content as regular content and parse the tool calls.
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
|
||||||
|
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals(variant, std::string("REASONING"), m.content);
|
||||||
|
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||||
|
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||||
|
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
// variant: thinking forced open + tool call in reasoning content + no closing think + partial
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
|
||||||
|
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||||
|
auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
|
||||||
|
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
|
||||||
|
assert_equals(variant, std::string(""), m.content);
|
||||||
|
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// variant: thinking not forced open + reasoning + regular content + no tool calls
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ true,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
|
||||||
|
const std::string in = "REASONING</think>CONTENT";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||||
|
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||||
|
}
|
||||||
|
// variant: thinking not forced open + missing reasoning + no tool calls
|
||||||
|
{
|
||||||
|
common_chat_syntax syntax = {
|
||||||
|
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||||
|
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||||
|
/* .reasoning_in_content = */ false,
|
||||||
|
/* .thinking_forced_open = */ false,
|
||||||
|
/* .parse_tool_calls = */ true,
|
||||||
|
};
|
||||||
|
const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
|
||||||
|
const std::string in = "CONTENT";
|
||||||
|
auto m = common_chat_parse(in, false, syntax);
|
||||||
|
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||||
|
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||||
|
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
|
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
|
||||||
common_chat_msg_parser builder(input, parse_as_partial, {});
|
common_chat_msg_parser builder(input, parse_as_partial, {});
|
||||||
auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
|
auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
|
||||||
@@ -350,6 +540,7 @@ int main() {
|
|||||||
test_json_with_dumped_args();
|
test_json_with_dumped_args();
|
||||||
test_reasoning();
|
test_reasoning();
|
||||||
test_regex();
|
test_regex();
|
||||||
|
test_deepseek_v3_1_tool_calls();
|
||||||
std::cout << "All tests passed!\n";
|
std::cout << "All tests passed!\n";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user