Split mode graph: use CUDA graphs (#1177)

* Use GUDA graphs also when theretensor overrides * Change graph key * This seems to work
2026-03-02 18:10:02 +00:00 · 2026-01-22 12:38:36 +02:00
parent 573e23679d
commit 851fda3509
3 changed files with 35 additions and 15 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4627,7 +4627,7 @@ struct llama_context * llama_new_context_with_model(
            // LLAMA_SPLIT_MODE_LAYER and LLAMA_SPLIT_MODE_GRAPH require a backend for each GPU
            auto params = cparams.cuda_params;
            std::string new_params;
-            if (model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
+            if (false && model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
                static const std::string extra_string{"graphs=0"};
                if (params) new_params = std::string{(const char *)params} + ',';
                new_params += extra_string;