RPC: support multiple devices including cpu (#1024)

* RPC support multiple devices

* rpc : update documentation (#16441)

Update the README file to match the newly added functionality of
exposing multiple devices from a single server.

Co-authored-by: Diego Devesa <slarengh@gmail.com>

# Conflicts:
#	examples/rpc/README.md

* Remove memory settings

* rpc : cache and reuse compute graphs (#15405)

Store the last computed graph and reuse it when possible.
Also do not return response from GRAPH_COMPUTE and assume it always
completes successfully. If this this is not the case, the server closes
the connection. This saves us a network round trip to the server.

* Add -cpu to include cpu backend

---------

Co-authored-by: firecoperana <firecoperana>
Co-authored-by: Radoslav Gerganov <rgerganov@gmail.com>
This commit is contained in:
firecoperana
2025-11-30 11:48:02 -06:00
committed by GitHub
parent 1cad1ec1cc
commit 15771072c7
8 changed files with 734 additions and 381 deletions

View File

@@ -999,15 +999,6 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v
GGML_UNUSED(user_data);
}
#ifdef GGML_USE_RPC
GGML_CALL static ggml_backend_t ggml_backend_reg_rpc_init(const char* params, void* user_data) {
return ggml_backend_rpc_init((const char*)user_data);
GGML_UNUSED(params);
GGML_UNUSED(user_data);
}
#endif
// multi-buffer buffer
struct ggml_backend_multi_buffer_context {
@@ -2159,6 +2150,7 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
ggml_backend_sched_synchronize(sched);
ggml_backend_sched_split_graph(sched, measure_graph);
@@ -2167,7 +2159,6 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
}
ggml_backend_sched_reset(sched);
ggml_backend_sched_synchronize(sched);
return true;
}