mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-01 12:09:54 +00:00
server: improve speed of speculative decoding (#1119)
* server: improve speed of speculative decoding change logs rpc: add recompute spec dec fix * Fix n_batch_size not set to context size for draft model --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -484,7 +484,7 @@ bool server_sent_event(httplib::DataSink& sink, const json& data) {
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
|
||||
|
||||
LOG_VERBOSE("data stream, to_send: %s", str.c_str());
|
||||
//LOG_VERBOSE("data stream, to_send: %s", str.c_str());
|
||||
|
||||
return sink.write(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user