Split mode graph: use CUDA graphs (#1177)

* Use GUDA graphs also when theretensor overrides

* Change graph key

* This seems to work
This commit is contained in:
Kawrakow
2026-01-22 12:38:36 +02:00
committed by GitHub
parent 573e23679d
commit 851fda3509
3 changed files with 35 additions and 15 deletions

View File

@@ -4627,7 +4627,7 @@ struct llama_context * llama_new_context_with_model(
// LLAMA_SPLIT_MODE_LAYER and LLAMA_SPLIT_MODE_GRAPH require a backend for each GPU
auto params = cparams.cuda_params;
std::string new_params;
if (model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
if (false && model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
static const std::string extra_string{"graphs=0"};
if (params) new_params = std::string{(const char *)params} + ',';
new_params += extra_string;