RPC: support multiple devices including cpu (#1024)

* RPC support multiple devices * rpc : update documentation (#16441) Update the README file to match the newly added functionality of exposing multiple devices from a single server. Co-authored-by: Diego Devesa <slarengh@gmail.com> # Conflicts: # examples/rpc/README.md * Remove memory settings * rpc : cache and reuse compute graphs (#15405) Store the last computed graph and reuse it when possible. Also do not return response from GRAPH_COMPUTE and assume it always completes successfully. If this this is not the case, the server closes the connection. This saves us a network round trip to the server. * Add -cpu to include cpu backend --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Radoslav Gerganov <rgerganov@gmail.com>
2026-04-29 19:01:47 +00:00 · 2025-11-30 11:48:02 -06:00
parent 1cad1ec1cc
commit 15771072c7
8 changed files with 734 additions and 381 deletions
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -999,15 +999,6 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v
    GGML_UNUSED(user_data);
 }

-#ifdef GGML_USE_RPC
-GGML_CALL static ggml_backend_t ggml_backend_reg_rpc_init(const char* params, void* user_data) {
-    return ggml_backend_rpc_init((const char*)user_data);
-
-    GGML_UNUSED(params);
-    GGML_UNUSED(user_data);
-}
-#endif
-
 // multi-buffer buffer

 struct ggml_backend_multi_buffer_context {
@@ -2159,6 +2150,7 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {

 bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
    GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
+    ggml_backend_sched_synchronize(sched);

    ggml_backend_sched_split_graph(sched, measure_graph);

@@ -2167,7 +2159,6 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
    }

    ggml_backend_sched_reset(sched);
-    ggml_backend_sched_synchronize(sched);

    return true;
 }