Set default MLA to 3 also in llama-bench (#949)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-05-11 00:20:19 +00:00 · 2025-11-13 09:52:06 +02:00
parent 874926800f
commit 88c02fa108
1 changed files with 1 additions and 1 deletions
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -289,7 +289,7 @@ static const cmd_params cmd_params_defaults = {
    /* main_gpu             */ {0},
    /* no_kv_offload        */ {false},
    /* flash_attn           */ {true},
-    /* mla_attn             */ {0},
+    /* mla_attn             */ {3},
    /* attn_max_batch       */ {0},
    /* ser                  */ {{-1,0.0f}},
    /* tensor_split         */ {std::vector<float>(llama_max_devices(), 0.0f)},