add jinja template support (#677)

Co-authored-by: firecoperana <firecoperana>
2026-03-14 15:57:37 +00:00 · 2025-08-09 07:50:30 -05:00
parent e23b2a7cc9
commit ff024df079
14 changed files with 3872 additions and 129 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -228,6 +228,7 @@ struct gpt_params {
    std::string hostname      = "127.0.0.1";
    std::string public_path   = "";
    std::string chat_template = "";
+    bool use_jinja = false;                                                                                 // NOLINT
    std::string system_prompt = "";
    bool enable_chat_template = true;

@@ -400,6 +401,11 @@ std::string llama_token_to_piece(
                       llama_token   token,
                       bool          special = true);

+std::string llama_token_to_piece(
+    const struct llama_model* model,
+    llama_token   token,
+    bool          special = true);
+
 // detokenizes a vector of tokens into a string
 // should work similar to Python's `tokenizer.decode`
 // optionally renders special/control tokens
@@ -423,26 +429,45 @@ struct llama_chat_msg {
 };

 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
-bool llama_chat_verify_template(const std::string & tmpl);
+bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
+
+namespace minja {
+    class chat_template;
+}
+
+typedef minja::chat_template common_chat_template;
+
+struct common_chat_templates {
+    bool has_explicit_template; // Model had builtin template or template overridde was specified.
+    std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+    std::unique_ptr<common_chat_template> template_tool_use;
+};
+

 // CPP wrapper for llama_chat_apply_template
 // If the built-in template is not supported, we default to chatml
 // If the custom "tmpl" is not supported, we throw an error
-std::string llama_chat_apply_template(const struct llama_model * model,
-        const std::string & tmpl,
-        const std::vector<llama_chat_msg> & chat,
-        bool add_ass);
+std::string llama_chat_apply_template(
+    const struct llama_model* model,
+    const common_chat_template& tmpl,
+    const std::vector< llama_chat_msg>& chat,
+    bool add_ass,
+    bool use_jinja);

 // Format single message, while taking into account the position of that message in chat history
-std::string llama_chat_format_single(const struct llama_model * model,
-        const std::string & tmpl,
-        const std::vector<llama_chat_msg> & past_msg,
-        const llama_chat_msg & new_msg,
-        bool add_ass);
+std::string  llama_chat_format_single(const struct llama_model* model,
+    const common_chat_template& tmpl,
+    const std::vector< llama_chat_msg>& past_msg,
+    const  llama_chat_msg& new_msg,
+    bool add_ass,
+    bool use_jinja);

 // Returns an example of formatted chat
-std::string llama_chat_format_example(const struct llama_model * model,
-        const std::string & tmpl);
+std::string  llama_chat_format_example(const struct llama_model* model,
+    const common_chat_template& tmpl, bool use_jinja);
+
+common_chat_templates  llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
+

 //
 // KV cache utils
@@ -502,3 +527,5 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
 void yaml_dump_non_result_info(
    FILE * stream, const gpt_params & params, const llama_context * lctx,
    const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
+
+std::string string_format(const char* fmt, ...);