mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-28 02:11:50 +00:00
Port universal assisted decoding to llama-server (#699)
* port universal assisted decoding to server * fix calls * fix LOG_INFO * fix llama_detokenize call * use emplace_back
This commit is contained in:
@@ -148,6 +148,8 @@ struct gpt_params {
|
||||
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
||||
std::vector<std::pair<int,int>> offload_policy;
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> replacements_draft; // main to speculative model replacements
|
||||
|
||||
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply)
|
||||
std::vector<llama_lora_adapter_info> lora_adapters; // lora adapter path with user defined scale
|
||||
|
||||
|
||||
Reference in New Issue
Block a user