server: improve speed of speculative decoding (#1119)

* server: improve speed of speculative decoding change logs rpc: add recompute spec dec fix * Fix n_batch_size not set to context size for draft model --------- Co-authored-by: firecoperana <firecoperana>
2026-03-06 12:00:29 +00:00 · 2026-01-10 00:01:22 -06:00
parent 6695c6c945
commit c1931663ad
7 changed files with 164 additions and 135 deletions
--- a/ggml/include/ggml-rpc.h
+++ b/ggml/include/ggml-rpc.h
@@ -9,7 +9,7 @@ extern "C" {

 #define RPC_PROTO_MAJOR_VERSION    3
 #define RPC_PROTO_MINOR_VERSION    5
-#define RPC_PROTO_PATCH_VERSION    1
+#define RPC_PROTO_PATCH_VERSION    2
 #define GGML_RPC_MAX_SERVERS       16

 // backend API