Port universal assisted decoding to llama-server (#699)

* port universal assisted decoding to server

* fix calls

* fix LOG_INFO

* fix llama_detokenize call

* use emplace_back
This commit is contained in:
g2mt
2025-08-17 23:22:23 -07:00
committed by GitHub
parent 6b2c84b099
commit 06bed7e01b
5 changed files with 160 additions and 55 deletions

View File

@@ -282,6 +282,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
}
}
for (auto & rep : params.replacements_draft) {
string_process_escapes(rep.first);
string_process_escapes(rep.second);
}
if (!params.kv_overrides.empty()) {
params.kv_overrides.emplace_back();
params.kv_overrides.back().key[0] = 0;
@@ -731,6 +736,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
}
return true;
}
if (arg == "--spec-replace") {
CHECK_ARG
std::string target = argv[i];
CHECK_ARG
std::string draft = argv[i];
params.replacements_draft.emplace_back(std::move(target), std::move(draft));
return true;
}
if (arg == "--cfg-negative-prompt") {
CHECK_ARG
sparams.cfg_negative_prompt = argv[i];