Refactor chat and server file (#1062)

* Add alternative log functions

* chat: fix int overflow, prevent size calculation in float/double (#17357)

* chat: fix int overflow, prevent size calculation in float/double

* Update common/chat.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* common : move all common_chat_parse_* to chat-parser.cpp. (#17481)

# Conflicts:
#	common/chat.cpp

* server: split server.cpp code into server/common/task/queue/context

* Fix compiler warning

* Clean up code

* common: use native MultiByteToWideChar

* move server prompt to server task

* Clean code

* delete utils.hpp

---------

Co-authored-by: firecoperana <firecoperana>
Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: DAN™ <dranger003@gmail.com>
This commit is contained in:
firecoperana
2025-12-15 01:27:20 -06:00
committed by GitHub
parent 7b03c9dcef
commit 0e91b89cd3
20 changed files with 6849 additions and 5613 deletions

View File

@@ -2726,11 +2726,29 @@ bool fs_validate_filename(const std::string & filename) {
return true;
}
#ifdef _WIN32
static std::wstring utf8_to_wstring(const std::string& str) {
if (str.empty()) {
return std::wstring();
}
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
if (size <= 0) {
return std::wstring();
}
std::wstring wstr(size, 0);
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
return wstr;
}
#endif
// returns true if successful, false otherwise
bool fs_create_directory_with_parents(const std::string & path) {
#ifdef _WIN32
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring wpath = converter.from_bytes(path);
std::wstring wpath = utf8_to_wstring(path);
// if the path already exists, check whether it's a directory
const DWORD attributes = GetFileAttributesW(wpath.c_str());
@@ -3586,175 +3604,6 @@ bool llama_should_add_bos_token(const llama_model * model) {
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
}
//
// Chat template utils
//
//
//bool llama_chat_verify_template(const struct llama_model* model, const std::string& tmpl, bool use_jinja) {
// if (use_jinja) {
// try {
// auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
// common_chat_inputs inputs;
// inputs.messages = json::array({ {
// {"role", "user"},
// {"content", "test"},
// } });
// common_chat_params_init(chat_template, inputs);
// return true;
// }
// catch (const std::exception& e) {
// fprintf(stdout,"%s: failed to apply template: %s\n", __func__, e.what());
// return false;
// }
// }
// llama_chat_message chat[] = { {"user", "test"} };
// const int res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
// return res >= 0;
//}
//std::string llama_chat_apply_template(const struct llama_model * model,
// const common_chat_template& tmpl,
// const std::vector<common_chat_msg> & msgs,
// bool add_ass,
// bool use_jinja) {
// if (use_jinja) {
// auto messages = json::array();
// for (const auto& msg : msgs) {
// messages.push_back({ {"role", msg.role}, {"content", msg.content} });
// }
// common_chat_inputs inputs;
// inputs.messages = messages;
// inputs.add_generation_prompt = add_ass;
// return common_chat_params_init(tmpl, inputs).prompt;
// }
// int alloc_size = 0;
// std::vector<llama_chat_message> chat;
// for (auto & msg : msgs) {
// chat.push_back({msg.role.c_str(), msg.content.c_str()});
// alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
// }
//
// std::vector<char> buf(alloc_size);
//
// // run the first time to get the total output length
// int32_t res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
// // error: chat template is not supported
// if (res < 0) {
// // if the custom "tmpl" is not supported, we throw an error
// // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
// throw std::runtime_error("this custom template is not supported");
// }
//
// // if it turns out that our buffer is too small, we resize it
// if ((size_t)res > buf.size()) {
// buf.resize(res);
// res = llama_chat_apply_template(model, tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size());
// }
//
// std::string formatted_chat(buf.data(), res);
// return formatted_chat;
//}
////
//std::string llama_chat_format_single(const struct llama_model * model,
// const common_chat_template& tmpl,
// const std::vector<common_chat_msg> & past_msg,
// const common_chat_msg & new_msg,
// bool add_ass,
// bool use_jinja) {
// std::ostringstream ss;
// auto fmt_past_msg = past_msg.empty() ? "" : llama_chat_apply_template(model, tmpl, past_msg, false, use_jinja);
// std::vector<common_chat_msg> chat_new(past_msg);
// // if the past_msg ends with a newline, we must preserve it in the formatted version
// if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
// ss << "\n";
// };
// // format chat with new_msg
// chat_new.push_back(new_msg);
// auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass, use_jinja);
// // get the diff part
// ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
// return ss.str();
//}
//std::string llama_chat_format_example(const struct llama_model * model, const common_chat_template& tmpl, bool use_jinja) {
// std::vector<common_chat_msg> msgs = {
// {"system", "You are a helpful assistant", {}},
// {"user", "Hello", {}},
// {"assistant", "Hi there", {}},
// {"user", "How are you?", {}},
// };
// return llama_chat_apply_template(model, tmpl, msgs, true, use_jinja);
//}
//
//#define CHATML_TEMPLATE_SRC \
// "{%- for message in messages -%}\n" \
// " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
// "{%- endfor -%}\n" \
// "{%- if add_generation_prompt -%}\n" \
// " {{- '<|im_start|>assistant\n' -}}\n" \
// "{%- endif -%}"
//
//common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override)
//{
// std::string default_template_src;
// std::string template_tool_use_src;
// bool has_explicit_template = !chat_template_override.empty();
// if (chat_template_override.empty()) {
// auto str = llama_model_chat_template(model, /* name */ nullptr);
// if (str) {
// default_template_src = str;
// has_explicit_template = true;
// }
// str = llama_model_chat_template(model, /* name */ "tool_use");
// if (str) {
// template_tool_use_src = str;
// has_explicit_template = true;
// }
// }
// else {
// default_template_src = chat_template_override;
// }
// if (default_template_src.empty() || default_template_src == "chatml") {
// if (!template_tool_use_src.empty()) {
// default_template_src = template_tool_use_src;
// }
// else {
// default_template_src = CHATML_TEMPLATE_SRC;
// }
// }
// auto vocab = llama_model_get_vocab(model);
// const auto get_token = [&](llama_token token, const char* name, const char* jinja_variable_name) {
// if (token == LLAMA_TOKEN_NULL) {
// if (default_template_src.find(jinja_variable_name) != std::string::npos
// || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
// fprintf(stdout, "%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name);
// }
// return std::string();
// }
// else {
// return llama_token_to_piece(model, token, true);
// }
// };
// auto token_bos = get_token(llama_token_bos_impl(*vocab), "BOS", "bos_token");
// auto token_eos = get_token(llama_token_eos_impl(*vocab), "EOS", "eos_token");
// try {
// return {
// has_explicit_template,
// std::make_unique<minja::chat_template>(default_template_src, token_bos, token_eos),
// template_tool_use_src.empty()
// ? nullptr
// : std::make_unique<minja::chat_template>(template_tool_use_src, token_bos, token_eos),
// };
// }
// catch (const std::exception& e) {
// LOG("%s: failed to parse chat template: %s\n", __func__, e.what());
// return {
// has_explicit_template,
// std::make_unique<minja::chat_template>(CHATML_TEMPLATE_SRC, token_bos, token_eos),
// nullptr,
// };
// }
//}
//
// KV cache utils