common: Generalized XML-style tool-call parsing with streaming support (#958)

* port upstream https://github.com/ggml-org/llama.cpp/pull/16932

* Add fixed chat templates.

* fix grammar when tool have no argument

* Insert additional stops for Kimi-K2

* Fix `no triggers set for lazy grammar!` for GLM4.5/4.6

* update chat.cpp

* fix grammar for GLM 4.5/4.6

* chat: Fix streaming parser for granite models (#15682)

* fix(chat): fix streaming parser for granite models

* tests: add test cases for Granite models chat parser

* common : Fix corrupted memory error on json grammar initialization (#16038)

Initalizing RESERVED_NAME in is_reserved_name() is not thread
safe and leads to corrupted memory when used from multiple threads
as can be seen in the asan trace below. This fixes the initialization
to make it thread-safe.

    #0 0x000100abd018 in std::__1::pair<std::__1::__hash_iterator<std::__1::__hash_node<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, void*>*>, bool> std::__1::__hash_table<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>>::__emplace_unique_key_args<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) __hash_table:1565
    #1 0x000100ab0320 in SchemaConverter::visit(nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) json-schema-to-grammar.cpp:802
    #2 0x000100aafc48 in std::__1::__function::__func<build_grammar(std::__1::function<void (common_grammar_builder const&)> const&, common_grammar_options const&)::$_2, std::__1::allocator<build_grammar(std::__1::function<void (common_grammar_builder const&)> const&, common_grammar_options const&)::$_2>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> (std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&)>::operator()(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&) function.h:319
    #3 0x000100a2c938 in std::__1::__function::__func<common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool)::$_0::operator()(common_grammar_builder const&) const::'lambda'(nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&), std::__1::allocator<common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool)::$_0::operator()(common_grammar_builder const&) const::'lambda'(nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&)>, void (nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&)>::operator()(nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&) function.h:319
    #4 0x000100a139f8 in foreach_function(nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&, std::__1::function<void (nlohmann::json_abi_v3_12_0::basic_json<nlohmann::json_abi_v3_12_0::ordered_map, std::__1::vector, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector<unsigned char, std::__1::allocator<unsigned char>>, void> const&)> const&) chat.cpp:762
    #5 0x000100a2a7f4 in std::__1::__function::__func<common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool)::$_0, std::__1::allocator<common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool)::$_0>, void (common_grammar_builder const&)>::operator()(common_grammar_builder const&) function.h:319
    #6 0x000100aa98f4 in build_grammar(std::__1::function<void (common_grammar_builder const&)> const&, common_grammar_options const&) json-schema-to-grammar.cpp:982
    #7 0x0001009c9314 in common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool) chat.cpp:1110
    #8 0x0001009b8afc in common_chat_templates_apply_jinja(common_chat_templates const*, common_chat_templates_inputs const&) chat.cpp:1992
    #9 0x0001009b533c in common_chat_templates_apply(common_chat_templates const*, common_chat_templates_inputs const&) chat.cpp:2074
    #10 0x000100810120 in llamacpp_apply_chat_template+0x724 (predict_oai-98384e17fb94e863:arm64+0x100090120)
    ...

==45482==Register values:
 x[0] = 0x00006020004147f8   x[1] = 0x00006080000013c8   x[2] = 0x0000000000000000   x[3] = 0x0000604006289738
 x[4] = 0x0000000000000002   x[5] = 0x0000000000000001   x[6] = 0x04034000004b4000   x[7] = 0x0000000000000001
 x[8] = 0xbebebebebebebebe   x[9] = 0x17d7d7d7d7d7d7d7  x[10] = 0x00000c04000828ff  x[11] = 0x0000000000000001
x[12] = 0x000000002018d383  x[13] = 0x0000000000000000  x[14] = 0xfa0000000000fafa  x[15] = 0x000010700001ffff
x[16] = 0x000000019dc012c0  x[17] = 0x00000001021284f8  x[18] = 0x0000000000000000  x[19] = 0x00000001700acdc0
x[20] = 0x0000000000000002  x[21] = 0x000000002018d384  x[22] = 0x16dd16fd2e731151  x[23] = 0x0000007000020000
x[24] = 0x0000000100c69c08  x[25] = 0x0000000100c69c20  x[26] = 0x00006080000013c7  x[27] = 0x0000000100c69c00
x[28] = 0x00000001700acd60     fp = 0x00000001700aceb0     lr = 0x0000000100abce30     sp = 0x00000001700acd60
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV __hash_table:1565 in std::__1::pair<std::__1::__hash_iterator<std::__1::__hash_node<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, void*>*>, bool> std::__1::__hash_table<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>>>::__emplace_unique_key_args<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&)
Thread T5 created by T0 here:
    #0 0x0001020b99d4 in pthread_create+0x5c (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x359d4)
    #1 0x000100873910 in std::sys::pal::unix::thread::Thread::new::h77254fdd87a28e05+0x118 (predict_oai-98384e17fb94e863:arm64+0x1000f3910)
    #2 0x0001007c7a1c in test::run_test::haeb3c2bcd5ed6cf6+0x76c (predict_oai-98384e17fb94e863:arm64+0x100047a1c)
    #3 0x0001007aedb0 in test::console::run_tests_console::he9d142d704f3a986+0x149c (predict_oai-98384e17fb94e863:arm64+0x10002edb0)
    #4 0x0001007c5758 in test::test_main::hf86a5e20735245b9+0x118 (predict_oai-98384e17fb94e863:arm64+0x100045758)
    #5 0x0001007c5da0 in test::test_main_static::h61ee9c8fd30abca0+0x54 (predict_oai-98384e17fb94e863:arm64+0x100045da0)
    ...

==45482==ABORTING

* common : fix reasoning before forced tool call via tool_choice = required (#16264)

* common : fix reasoning before forced tool call via tool_choice = required

* common : improve reasoning and commentary handling when tool_choice is required

(cherry picked from commit c746984956d6882c2de73d53ae2bb3bdf889e475)

---------

Co-authored-by: Alde Rojas <hello@alde.dev>

* Try fix Jinja template for GLM

* Improve Kimi-K2 chat template

* Fix "Invalid tool call arguments passed" in a rare case.

In a rare case, the model may emit a raw string that begins with a valid JSON string. This commit adds unit tests to cover that scenario and fixes the regression introduced during the Kimi-K2 adaptation.

---------

Co-authored-by: shun095 <8069181+shun095@users.noreply.github.com>
Co-authored-by: David Ribeiro Alves <davidralves@gmail.com>
Co-authored-by: crat0z <11581854+crat0z@users.noreply.github.com>
Co-authored-by: Alde Rojas <hello@alde.dev>
This commit is contained in:
hksdpc255
2025-11-19 01:29:58 +11:00
committed by GitHub
parent eb3ce7549b
commit da5de88073
18 changed files with 3255 additions and 70 deletions

View File

@@ -0,0 +1,106 @@
[gMASK]<sop>
{%- if tools -%}
<|system|>
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{% for tool in tools %}
{{ tool | tojson(ensure_ascii=False) }}
{% endfor %}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool_call>{function-name}
<arg_key>{arg-key-1}</arg_key>
<arg_value>{arg-value-1}</arg_value>
<arg_key>{arg-key-2}</arg_key>
<arg_value>{arg-value-2}</arg_value>
...
</tool_call>{%- endif -%}
{%- macro visible_text(content) -%}
{%- if content is string -%}
{{- content }}
{%- elif content is iterable and content is not mapping -%}
{%- for item in content -%}
{%- if item is mapping and item.type == 'text' -%}
{{- item.text }}
{%- elif item is string -%}
{{- item }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{- content }}
{%- endif -%}
{%- endmacro -%}
{%- set ns = namespace(last_user_index=-1) %}
{%- for m in messages %}
{%- if m.role == 'user' %}
{% set ns.last_user_index = loop.index0 -%}
{%- endif %}
{%- endfor %}
{% for m in messages %}
{%- if m.role == 'user' -%}<|user|>
{{ visible_text(m.content) }}
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
{%- elif m.role == 'assistant' -%}
<|assistant|>
{%- set reasoning_content = '' %}
{%- set content = visible_text(m.content) %}
{%- if m.reasoning_content is string %}
{%- set reasoning_content = m.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
{%- else -%}
{{ '\n<think></think>' }}
{%- endif -%}
{%- if content.strip() -%}
{{ '\n' + content.strip() }}
{%- endif -%}
{% if m.tool_calls %}
{% for tc in m.tool_calls %}
{%- if tc.function %}
{%- set tc = tc.function %}
{%- endif %}
{{ '\n<tool_call>' + tc.name }}
{% set _args = tc.arguments %}
{%- if _args is not mapping -%}
{%- set _args = {} %}
{%- endif -%}
{% for k, v in _args | items %}
<arg_key>{{ k }}</arg_key>
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
{% endfor %}
</tool_call>{% endfor %}
{% endif %}
{%- elif m.role == 'tool' -%}
{%- if m.content is string -%}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|observation|>' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- m.content }}
{{- '\n</tool_response>' }}
{%- else -%}
<|observation|>{% for tr in m.content %}
<tool_response>
{{ tr.output if tr.output is defined else tr }}
</tool_response>{% endfor -%}
{% endif -%}
{%- elif m.role == 'system' -%}
<|system|>
{{ visible_text(m.content) }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
<|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
{%- endif -%}

View File

@@ -0,0 +1,64 @@
{% macro render_content(msg) -%}
{%- set c = msg.get('content') -%}
{%- if c is string -%}
{{ c }}
{%- elif c is not none -%}
{% for content in c -%}
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
{% else -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endmacro %}
{%- set tool_response_queue = namespace(ids=[]) -%}
{%- set tool_call_counter = namespace(value=1) -%}
{%- if tools -%}
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
{%- endif -%}
{% for message in messages %}
{%- if loop.first and messages[0]['role'] != 'system' -%}
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
{% endif %}
{%- set role_name = message.get('name') or message['role'] -%}
{%- if message['role'] == 'user' -%}
<|im_user|>{{role_name}}<|im_middle|>
{%- elif message['role'] == 'assistant' -%}
<|im_assistant|>{{role_name}}<|im_middle|>
{%- else -%}
<|im_system|>{{role_name}}<|im_middle|>
{%- endif -%}
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
{{render_content(message)}}<|tool_calls_section_begin|>
{%- for tool_call in message['tool_calls'] -%}
{%- if tool_call['id'] is defined -%}
{%- set formatted_id = tool_call['id'] -%}
{%- else -%}
{%- set formatted_id = 'functions.' + tool_call['function']['name'] + ':' + (tool_call_counter.value | string) -%}
{%- set tool_call_counter.value = tool_call_counter.value + 1 -%}
{%- endif -%}
{%- set _ = tool_response_queue.ids.append(formatted_id) -%}
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
{%- endfor -%}
<|tool_calls_section_end|>
{%- elif message['role'] == 'tool' -%}
{%- if tool_response_queue.ids -%}
{%- set tool_call_id = tool_response_queue.ids.pop(0) -%}
{%- else -%}
{%- set tool_call_id = 'functions.' + message.get('name', 'unknown') + ':' + (tool_call_counter.value | string) -%}
{%- endif -%}
## Return of {{ tool_call_id }}
{{render_content(message)}}
{%- elif message['content'] is not none -%}
{{render_content(message)}}
{%- endif -%}
<|im_end|>
{%- endfor -%}
{%- if add_generation_prompt -%}
<|im_assistant|>assistant<|im_middle|>
{%- endif -%}

View File

@@ -0,0 +1,112 @@
{%- macro render_content(msg) -%}
{%- set c = msg.get('content') -%}
{%- if c is string -%}
{{ c }}
{%- elif c is not none -%}
{% for content in c -%}
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
{% else -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endmacro -%}
{% macro set_roles(message) -%}
{%- set role_name = message.get('name') or message['role'] -%}
{%- if message['role'] == 'user' -%}
<|im_user|>{{role_name}}<|im_middle|>
{%- elif message['role'] == 'assistant' -%}
<|im_assistant|>{{role_name}}<|im_middle|>
{%- else -%}
<|im_system|>{{role_name}}<|im_middle|>
{%- endif -%}
{%- endmacro -%}
{%- set tool_response_queue = namespace(ids=[]) -%}
{%- set tool_call_counter = namespace(value=1) -%}
{%- macro render_toolcalls(message) -%}
<|tool_calls_section_begin|>
{%- for tool_call in message['tool_calls'] -%}
{%- if tool_call['id'] is defined -%}
{%- set formatted_id = tool_call['id'] -%}
{%- else -%}
{%- set formatted_id = 'functions.' + tool_call['function']['name'] + ':' + (tool_call_counter.value | string) -%}
{%- set tool_call_counter.value = tool_call_counter.value + 1 -%}
{%- endif -%}
{%- set _ = tool_response_queue.ids.append(formatted_id) -%}
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
{%- endfor -%}
<|tool_calls_section_end|>
{%- endmacro -%}
{# Find last non-tool-call assisitant message #}
{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
{%- for idx in range(messages|length-1, -1, -1) -%}
{%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
{%- set ns.last_non_tool_call_assistant_msg = idx -%}
{%- endif -%}
{%- endfor -%}
{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
{%- if tools -%}
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
{%- endif -%}
{%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
<|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
{%- endif -%}
{%- for message in hist_msgs -%}
{{set_roles(message)}}
{%- if message['role'] == 'assistant' -%}
<think></think>{{render_content(message)}}
{%- if message.get('tool_calls') -%}
{{render_toolcalls(message)}}
{%- endif -%}
{%- elif message['role'] == 'tool' -%}
{%- if tool_response_queue.ids -%}
{%- set tool_call_id = tool_response_queue.ids.pop(0) -%}
{%- else -%}
{%- set tool_call_id = 'functions.' + message.get('name', 'unknown') + ':' + (tool_call_counter.value | string) -%}
{%- endif -%}
## Return of {{ tool_call_id }}
{{render_content(message)}}
{%- elif message['content'] is not none -%}
{{render_content(message)}}
{%- endif -%}
<|im_end|>
{%- endfor -%}
{%- for message in suffix_msgs -%}
{{set_roles(message)}}
{%- if message['role'] == 'assistant' -%}
{%- set rc = message.get('reasoning_content', '') -%}
<think>{{rc}}</think>{{render_content(message)}}
{%- if message.get('tool_calls') -%}
{{render_toolcalls(message)}}
{%- endif -%}
{%- elif message['role'] == 'tool' -%}
{%- if tool_response_queue.ids -%}
{%- set tool_call_id = tool_response_queue.ids.pop(0) -%}
{%- else -%}
{%- set tool_call_id = 'functions.' + message.get('name', 'unknown') + ':' + (tool_call_counter.value | string) -%}
{%- endif -%}
## Return of {{ tool_call_id }}
{{render_content(message)}}
{%- elif message['content'] is not none -%}
{{render_content(message)}}
{%- endif -%}
<|im_end|>
{%- endfor -%}
{%- if add_generation_prompt -%}
<|im_assistant|>assistant<|im_middle|>
{%- endif -%}

View File

@@ -0,0 +1,54 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0]['role'] == 'system' %}
{{- messages[0]['content'] }}
{%- else %}
{{- 'You are MiMo, an AI assistant developed by Xiaomi.' }}
{%- endif %}
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0]['role'] == 'system' %}
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
{%- else %}
{{- '<|im_start|>system\nYou are MiMo, an AI assistant developed by Xiaomi.<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in messages %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{{- '<|im_start|>' + message.role }}
{%- if message.content %}
{{- '\n' + message.content }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{{- tool_call.arguments | tojson }}
{{- '}\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}

View File

@@ -0,0 +1,117 @@
{% macro render_extra_keys(json_dict, handled_keys) %}
{%- if json_dict is mapping %}
{%- for json_key in json_dict if json_key not in handled_keys %}
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
{%- else %}
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
{%- endif %}
{%- endfor %}
{%- endif %}
{% endmacro %}
{%- if messages[0]["role"] == "system" %}
{%- set system_message = messages[0]["content"] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = [] %}
{%- endif %}
{%- if system_message is defined %}
{{- "<|im_start|>system\n" + system_message }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
{%- endif %}
{%- endif %}
{%- if tools is iterable and tools | length > 0 %}
{{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
{{- "<tools>" }}
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
{%- if tool.description is defined %}
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
{%- endif %}
{{- '\n<parameters>' }}
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- '\n<parameter>' }}
{{- '\n<name>' ~ param_name ~ '</name>' }}
{%- if param_fields.type is defined %}
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
{%- endif %}
{%- if param_fields.description is defined %}
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
{%- endif %}
{%- set handled_keys = ['name', 'type', 'description'] %}
{{- render_extra_keys(param_fields, handled_keys) }}
{{- '\n</parameter>' }}
{%- endfor %}
{%- endif %}
{% set handled_keys = ['type', 'properties'] %}
{{- render_extra_keys(tool.parameters, handled_keys) }}
{{- '\n</parameters>' }}
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
{{- render_extra_keys(tool, handled_keys) }}
{{- '\n</function>' }}
{%- endfor %}
{{- "\n</tools>" }}
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
{%- endif %}
{%- if system_message is defined %}
{{- '<|im_end|>\n' }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in loop_messages %}
{%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
{{- '<|im_start|>' + message.role }}
{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
{{- '\n' + message.content | trim + '\n' }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- if tool_call.arguments is defined %}
{%- for args_name, args_value in tool_call.arguments|items %}
{{- '<parameter=' + args_name + '>\n' }}
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
{{- args_value }}
{{- '\n</parameter>\n' }}
{%- endfor %}
{%- endif %}
{{- '</function>\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>user\n' }}
{%- endif %}
{{- '<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>\n' }}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>\n' }}
{%- elif loop.last %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}

View File

@@ -0,0 +1,126 @@
{# Unsloth template fixes #}
{%- set available_tools_string = '' -%}
{%- set thought_instructions = '' -%}
{%- set add_tool_id = true -%}
{%- set tool_output_format = "default" -%}
{%- if tools is not none and tools|length > 0 -%}
{%- set available_tools_string -%}
You are provided with function signatures within <available_tools></available_tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about the arguments. You should infer the argument values from previous user responses and the system message. Here are the available tools:
<available_tools>
{% for tool in tools %}
{{ tool|string }}
{% endfor %}
</available_tools>
{%- endset -%}
{%- endif -%}
{%- if tool_output_format is none or tool_output_format == "default" -%}
{%- set tool_output_instructions -%}
Return all function calls as a list of json objects within <tool_call></tool_call> XML tags. Each json object should contain a function name and arguments as follows:
<tool_calls>[{"name": <function-name-1>, "arguments": <args-dict-1>}, {"name": <function-name-2>, "arguments": <args-dict-2>},...]</tool_calls>
{%- endset -%}
{%- elif tool_output_format == "yaml" -%}
{%- set tool_output_instructions -%}
Return all function calls as a list of yaml objects within <tool_call></tool_call> XML tags. Each yaml object should contain a function name and arguments as follows:
<tool_calls>
- name: <function-name-1>
arguments: <args-dict-1>
- name: <function-name-2>
arguments: <args-dict-2>
...
</tool_calls>
{%- endset -%}
{%- endif -%}
{%- if add_thoughts -%}
{%- set thought_instructions -%}
Prior to generating the function calls, you should generate the reasoning for why you're calling the function. Please generate these reasoning thoughts between <thinking> and </thinking> XML tags.
{%- endset -%}
{%- endif -%}
{{- bos_token -}}
{%- set reasoning_prompt='You are a thoughtful and systematic AI assistant built by ServiceNow Language Models (SLAM) lab. Before providing an answer, analyze the problem carefully and present your reasoning step by step. After explaining your thought process, provide the final solution in the following format: [BEGIN FINAL RESPONSE] ... [END FINAL RESPONSE].' -%}
{%- if messages[0]['role'] != 'system' and tools is not none and tools|length > 0 -%}
{{- '<|system|>\n' + reasoning_prompt + available_tools_string + "\n" + tool_output_instructions + '\n<|end|>\n' -}}
{%- endif -%}
{%- if messages|selectattr('role', 'equalto', 'system')|list|length == 0 -%}
{{- '<|system|>\n' + reasoning_prompt + '\n<|end|>\n' -}}
{%- endif -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- '<|user|>\n' }}
{%- if message['content'] is not string %}
{%- for chunk in message['content'] %}
{%- if chunk['type'] == 'text' %}
{{- chunk['text'] }}
{%- elif chunk['type'] == 'image' or chunk['type'] == 'image_url'%}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Unrecognized content type!') }}
{%- endif %}
{%- endfor %}
{%- else %}
{{- message['content'] }}
{%- endif %}
{{- '\n<|end|>\n' }}
{%- elif message['role'] == 'content' -%}
{%- if message['content'] is not string %}
{{- '<|content|>\n' + message['content'][0]['text'] + '\n<|end|>\n' -}}
{%- else %}
{{- '<|content|>\n' + message['content'] + '\n<|end|>\n' -}}
{%- endif -%}
{%- elif message['role'] == 'system' -%}
{%- if message['content'] is not none and message['content']|length > 0 %}
{%- if message['content'] is string %}
{%- set system_message = message['content'] %}
{%- else %}
{%- set system_message = message['content'][0]['text'] %}
{%- endif %}
{%- else %}
{%- set system_message = '' %}
{%- endif %}
{%- if tools is not none and tools|length > 0 -%}
{{- '<|system|>\n' + reasoning_prompt + system_message + '\n' + available_tools_string + '\n<|end|>\n' -}}
{%- else -%}
{{- '<|system|>\n' + reasoning_prompt + system_message + '\n<|end|>\n' -}}
{%- endif -%}
{%- elif message['role'] == 'assistant' -%}
{%- if loop.last -%}
{%- set add_tool_id = false -%}
{%- endif -%}
{{- '<|assistant|>\n' -}}
{%- if message['content'] is not none and message['content']|length > 0 -%}
{%- if message['content'] is not string and message['content'][0]['text'] is not none %}
{{- message['content'][0]['text'] }}
{%- else %}
{{- message['content'] -}}
{%- endif -%}
{%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
{{- message['chosen'][0] -}}
{%- endif -%}
{%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
{{- '<thinking>' + message['thought'] + '</thinking>' -}}
{%- endif -%}
{%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
{{- '\n<tool_calls>[' -}}
{%- for tool_call in message["tool_calls"] -%}
{{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
{%- if add_tool_id == true -%}
{{- ', "id": "' + tool_call['id'] + '"' -}}
{%- endif -%}
{{- '}' -}}
{%- if not loop.last -%}{{- ', ' -}}{%- endif -%}
{%- endfor -%}
{{- ']</tool_calls>' -}}
{%- endif -%}
{{- '\n<|end|>\n' + eos_token -}}
{%- elif message['role'] == 'tool' -%}
{%- if message['content'] is string %}
{%- set tool_message = message['content'] %}
{%- else %}
{%- set tool_message = message['content'][0]['text'] %}
{%- endif -%}
{{- '<|tool_result|>\n' + tool_message|string + '\n<|end|>\n' -}}
{%- endif -%}
{%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
{{- '<|assistant|>\n' -}}
{%- endif -%}
{%- endfor -%}
{# Copyright 2025-present Unsloth. Apache 2.0 License. #}