mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
add jinja template support (#677)
Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -228,6 +228,7 @@ struct gpt_params {
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::string public_path = "";
|
||||
std::string chat_template = "";
|
||||
bool use_jinja = false; // NOLINT
|
||||
std::string system_prompt = "";
|
||||
bool enable_chat_template = true;
|
||||
|
||||
@@ -400,6 +401,11 @@ std::string llama_token_to_piece(
|
||||
llama_token token,
|
||||
bool special = true);
|
||||
|
||||
std::string llama_token_to_piece(
|
||||
const struct llama_model* model,
|
||||
llama_token token,
|
||||
bool special = true);
|
||||
|
||||
// detokenizes a vector of tokens into a string
|
||||
// should work similar to Python's `tokenizer.decode`
|
||||
// optionally renders special/control tokens
|
||||
@@ -423,26 +429,45 @@ struct llama_chat_msg {
|
||||
};
|
||||
|
||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
bool llama_chat_verify_template(const std::string & tmpl);
|
||||
bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
|
||||
|
||||
namespace minja {
|
||||
class chat_template;
|
||||
}
|
||||
|
||||
typedef minja::chat_template common_chat_template;
|
||||
|
||||
struct common_chat_templates {
|
||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
||||
std::unique_ptr<common_chat_template> template_tool_use;
|
||||
};
|
||||
|
||||
|
||||
// CPP wrapper for llama_chat_apply_template
|
||||
// If the built-in template is not supported, we default to chatml
|
||||
// If the custom "tmpl" is not supported, we throw an error
|
||||
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & chat,
|
||||
bool add_ass);
|
||||
std::string llama_chat_apply_template(
|
||||
const struct llama_model* model,
|
||||
const common_chat_template& tmpl,
|
||||
const std::vector< llama_chat_msg>& chat,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
|
||||
// Format single message, while taking into account the position of that message in chat history
|
||||
std::string llama_chat_format_single(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & past_msg,
|
||||
const llama_chat_msg & new_msg,
|
||||
bool add_ass);
|
||||
std::string llama_chat_format_single(const struct llama_model* model,
|
||||
const common_chat_template& tmpl,
|
||||
const std::vector< llama_chat_msg>& past_msg,
|
||||
const llama_chat_msg& new_msg,
|
||||
bool add_ass,
|
||||
bool use_jinja);
|
||||
|
||||
// Returns an example of formatted chat
|
||||
std::string llama_chat_format_example(const struct llama_model * model,
|
||||
const std::string & tmpl);
|
||||
std::string llama_chat_format_example(const struct llama_model* model,
|
||||
const common_chat_template& tmpl, bool use_jinja);
|
||||
|
||||
common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
|
||||
|
||||
|
||||
//
|
||||
// KV cache utils
|
||||
@@ -502,3 +527,5 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
|
||||
void yaml_dump_non_result_info(
|
||||
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
||||
|
||||
std::string string_format(const char* fmt, ...);
|
||||
|
||||
Reference in New Issue
Block a user