mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 09:09:50 +00:00
server: improve speed of speculative decoding (#1119)
* server: improve speed of speculative decoding change logs rpc: add recompute spec dec fix * Fix n_batch_size not set to context size for draft model --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -9,7 +9,7 @@ extern "C" {
|
||||
|
||||
#define RPC_PROTO_MAJOR_VERSION 3
|
||||
#define RPC_PROTO_MINOR_VERSION 5
|
||||
#define RPC_PROTO_PATCH_VERSION 1
|
||||
#define RPC_PROTO_PATCH_VERSION 2
|
||||
#define GGML_RPC_MAX_SERVERS 16
|
||||
|
||||
// backend API
|
||||
|
||||
@@ -1828,6 +1828,16 @@ static void rpc_serve_client(const std::vector<ggml_backend_t>& backends, const
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RPC_CMD_GRAPH_RECOMPUTE: {
|
||||
rpc_msg_graph_recompute_req request;
|
||||
if (!recv_msg(sockfd, &request, sizeof(request))) {
|
||||
return;
|
||||
}
|
||||
if (!server.graph_recompute(request)) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RPC_CMD_GET_DEVICE_MEMORY: {
|
||||
rpc_msg_get_device_memory_req request;
|
||||
if (!recv_msg(sockfd, &request, sizeof(request))) {
|
||||
|
||||
Reference in New Issue
Block a user