Set default MLA to 3 also in llama-bench

2026-03-01 17:40:25 +00:00 · 2025-11-13 09:50:23 +02:00
parent 8a8de91a42
commit aba78ceafa
1 changed files with 1 additions and 1 deletions
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -289,7 +289,7 @@ static const cmd_params cmd_params_defaults = {
    /* main_gpu             */ {0},
    /* no_kv_offload        */ {false},
    /* flash_attn           */ {true},
-    /* mla_attn             */ {0},
+    /* mla_attn             */ {3},
    /* attn_max_batch       */ {0},
    /* ser                  */ {{-1,0.0f}},
    /* tensor_split         */ {std::vector<float>(llama_max_devices(), 0.0f)},