mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-22 22:24:11 +00:00
Refactor chat and server file (#1062)
* Add alternative log functions * chat: fix int overflow, prevent size calculation in float/double (#17357) * chat: fix int overflow, prevent size calculation in float/double * Update common/chat.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * common : move all common_chat_parse_* to chat-parser.cpp. (#17481) # Conflicts: # common/chat.cpp * server: split server.cpp code into server/common/task/queue/context * Fix compiler warning * Clean up code * common: use native MultiByteToWideChar * move server prompt to server task * Clean code * delete utils.hpp --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: DAN™ <dranger003@gmail.com>
This commit is contained in:
@@ -2726,11 +2726,29 @@ bool fs_validate_filename(const std::string & filename) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static std::wstring utf8_to_wstring(const std::string& str) {
|
||||
if (str.empty()) {
|
||||
return std::wstring();
|
||||
}
|
||||
|
||||
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
|
||||
|
||||
if (size <= 0) {
|
||||
return std::wstring();
|
||||
}
|
||||
|
||||
std::wstring wstr(size, 0);
|
||||
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
|
||||
|
||||
return wstr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// returns true if successful, false otherwise
|
||||
bool fs_create_directory_with_parents(const std::string & path) {
|
||||
#ifdef _WIN32
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
std::wstring wpath = converter.from_bytes(path);
|
||||
std::wstring wpath = utf8_to_wstring(path);
|
||||
|
||||
// if the path already exists, check whether it's a directory
|
||||
const DWORD attributes = GetFileAttributesW(wpath.c_str());
|
||||
@@ -3586,175 +3604,6 @@ bool llama_should_add_bos_token(const llama_model * model) {
|
||||
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
||||
}
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
//
|
||||
//
|
||||
//bool llama_chat_verify_template(const struct llama_model* model, const std::string& tmpl, bool use_jinja) {
|
||||
// if (use_jinja) {
|
||||
// try {
|
||||
// auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
|
||||
// common_chat_inputs inputs;
|
||||
// inputs.messages = json::array({ {
|
||||
// {"role", "user"},
|
||||
// {"content", "test"},
|
||||
// } });
|
||||
// common_chat_params_init(chat_template, inputs);
|
||||
// return true;
|
||||
// }
|
||||
// catch (const std::exception& e) {
|
||||
// fprintf(stdout,"%s: failed to apply template: %s\n", __func__, e.what());
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// llama_chat_message chat[] = { {"user", "test"} };
|
||||
// const int res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
|
||||
// return res >= 0;
|
||||
//}
|
||||
|
||||
//std::string llama_chat_apply_template(const struct llama_model * model,
|
||||
// const common_chat_template& tmpl,
|
||||
// const std::vector<common_chat_msg> & msgs,
|
||||
// bool add_ass,
|
||||
// bool use_jinja) {
|
||||
// if (use_jinja) {
|
||||
// auto messages = json::array();
|
||||
// for (const auto& msg : msgs) {
|
||||
// messages.push_back({ {"role", msg.role}, {"content", msg.content} });
|
||||
// }
|
||||
// common_chat_inputs inputs;
|
||||
// inputs.messages = messages;
|
||||
// inputs.add_generation_prompt = add_ass;
|
||||
// return common_chat_params_init(tmpl, inputs).prompt;
|
||||
// }
|
||||
// int alloc_size = 0;
|
||||
// std::vector<llama_chat_message> chat;
|
||||
// for (auto & msg : msgs) {
|
||||
// chat.push_back({msg.role.c_str(), msg.content.c_str()});
|
||||
// alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
|
||||
// }
|
||||
//
|
||||
// std::vector<char> buf(alloc_size);
|
||||
//
|
||||
// // run the first time to get the total output length
|
||||
// int32_t res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
|
||||
// // error: chat template is not supported
|
||||
// if (res < 0) {
|
||||
// // if the custom "tmpl" is not supported, we throw an error
|
||||
// // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
|
||||
// throw std::runtime_error("this custom template is not supported");
|
||||
// }
|
||||
//
|
||||
// // if it turns out that our buffer is too small, we resize it
|
||||
// if ((size_t)res > buf.size()) {
|
||||
// buf.resize(res);
|
||||
// res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
|
||||
// }
|
||||
//
|
||||
// std::string formatted_chat(buf.data(), res);
|
||||
// return formatted_chat;
|
||||
//}
|
||||
////
|
||||
//std::string llama_chat_format_single(const struct llama_model * model,
|
||||
// const common_chat_template& tmpl,
|
||||
// const std::vector<common_chat_msg> & past_msg,
|
||||
// const common_chat_msg & new_msg,
|
||||
// bool add_ass,
|
||||
// bool use_jinja) {
|
||||
// std::ostringstream ss;
|
||||
// auto fmt_past_msg = past_msg.empty() ? "" : llama_chat_apply_template(model, tmpl, past_msg, false, use_jinja);
|
||||
// std::vector<common_chat_msg> chat_new(past_msg);
|
||||
// // if the past_msg ends with a newline, we must preserve it in the formatted version
|
||||
// if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
|
||||
// ss << "\n";
|
||||
// };
|
||||
// // format chat with new_msg
|
||||
// chat_new.push_back(new_msg);
|
||||
// auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass, use_jinja);
|
||||
// // get the diff part
|
||||
// ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
|
||||
// return ss.str();
|
||||
//}
|
||||
|
||||
//std::string llama_chat_format_example(const struct llama_model * model, const common_chat_template& tmpl, bool use_jinja) {
|
||||
// std::vector<common_chat_msg> msgs = {
|
||||
// {"system", "You are a helpful assistant", {}},
|
||||
// {"user", "Hello", {}},
|
||||
// {"assistant", "Hi there", {}},
|
||||
// {"user", "How are you?", {}},
|
||||
// };
|
||||
// return llama_chat_apply_template(model, tmpl, msgs, true, use_jinja);
|
||||
//}
|
||||
//
|
||||
//#define CHATML_TEMPLATE_SRC \
|
||||
// "{%- for message in messages -%}\n" \
|
||||
// " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
|
||||
// "{%- endfor -%}\n" \
|
||||
// "{%- if add_generation_prompt -%}\n" \
|
||||
// " {{- '<|im_start|>assistant\n' -}}\n" \
|
||||
// "{%- endif -%}"
|
||||
//
|
||||
//common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override)
|
||||
//{
|
||||
// std::string default_template_src;
|
||||
// std::string template_tool_use_src;
|
||||
// bool has_explicit_template = !chat_template_override.empty();
|
||||
// if (chat_template_override.empty()) {
|
||||
// auto str = llama_model_chat_template(model, /* name */ nullptr);
|
||||
// if (str) {
|
||||
// default_template_src = str;
|
||||
// has_explicit_template = true;
|
||||
// }
|
||||
// str = llama_model_chat_template(model, /* name */ "tool_use");
|
||||
// if (str) {
|
||||
// template_tool_use_src = str;
|
||||
// has_explicit_template = true;
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// default_template_src = chat_template_override;
|
||||
// }
|
||||
// if (default_template_src.empty() || default_template_src == "chatml") {
|
||||
// if (!template_tool_use_src.empty()) {
|
||||
// default_template_src = template_tool_use_src;
|
||||
// }
|
||||
// else {
|
||||
// default_template_src = CHATML_TEMPLATE_SRC;
|
||||
// }
|
||||
// }
|
||||
// auto vocab = llama_model_get_vocab(model);
|
||||
// const auto get_token = [&](llama_token token, const char* name, const char* jinja_variable_name) {
|
||||
// if (token == LLAMA_TOKEN_NULL) {
|
||||
// if (default_template_src.find(jinja_variable_name) != std::string::npos
|
||||
// || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
|
||||
// fprintf(stdout, "%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name);
|
||||
// }
|
||||
// return std::string();
|
||||
// }
|
||||
// else {
|
||||
// return llama_token_to_piece(model, token, true);
|
||||
// }
|
||||
// };
|
||||
// auto token_bos = get_token(llama_token_bos_impl(*vocab), "BOS", "bos_token");
|
||||
// auto token_eos = get_token(llama_token_eos_impl(*vocab), "EOS", "eos_token");
|
||||
// try {
|
||||
// return {
|
||||
// has_explicit_template,
|
||||
// std::make_unique<minja::chat_template>(default_template_src, token_bos, token_eos),
|
||||
// template_tool_use_src.empty()
|
||||
// ? nullptr
|
||||
// : std::make_unique<minja::chat_template>(template_tool_use_src, token_bos, token_eos),
|
||||
// };
|
||||
// }
|
||||
// catch (const std::exception& e) {
|
||||
// LOG("%s: failed to parse chat template: %s\n", __func__, e.what());
|
||||
// return {
|
||||
// has_explicit_template,
|
||||
// std::make_unique<minja::chat_template>(CHATML_TEMPLATE_SRC, token_bos, token_eos),
|
||||
// nullptr,
|
||||
// };
|
||||
// }
|
||||
//}
|
||||
|
||||
//
|
||||
// KV cache utils
|
||||
|
||||
Reference in New Issue
Block a user