Tool calls support from mainline (#723)

* Tool calls support from mainline * update cmake * revert api for /completions * Fix broken thinking process for gpt-oss * add missing args and fix webui bugs * add missing args and fix webui bugs2 * Fix reasoning format error * add usage * change default post_sampling_probs to true * add back generated_text * Remove server endpoints tests * add log * Chat fixes * Remove logs * webui: revert extra handling of thinking process --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-28 18:32:04 +00:00 · 2025-09-01 00:38:49 -05:00
parent b66cecca45
commit 0f9ecaec04
87 changed files with 13581 additions and 2224 deletions
--- a/scripts/get_hf_chat_template.py
+++ b/scripts/get_hf_chat_template.py
@@ -4,12 +4,11 @@
  If a model has multiple chat templates, you can specify the variant name.

  Syntax:
-    ./scripts/get_hf_chat_template.py model_id [variant]
+    ./scripts/get_chat_template.py model_id [variant]

  Examples:
-    ./scripts/get_hf_chat_template.py NousResearch/Meta-Llama-3-8B-Instruct
-    ./scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use
-    ./scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct
+    ./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use
+    ./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct
 '''

 import json
@@ -17,7 +16,7 @@ import re
 import sys


-def get_hf_chat_template(model_id, variant=None):
+def get_chat_template(model_id, variant=None):
    try:
        # Use huggingface_hub library if available.
        # Allows access to gated models if the user has access and ran `huggingface-cli login`.
@@ -69,7 +68,7 @@ def main(args):
    model_id = args[0]
    variant = None if len(args) < 2 else args[1]

-    template = get_hf_chat_template(model_id, variant)
+    template = get_chat_template(model_id, variant)
    sys.stdout.write(template)