Make MLA optional

2026-02-25 07:34:10 +00:00 · 2025-02-06 15:10:06 +02:00
parent 35246c4e75
commit 37c4fbd7f9
5 changed files with 231 additions and 114 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -374,6 +374,7 @@ extern "C" {
        bool embeddings;  // if true, extract embeddings (together with logits)
        bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
        bool flash_attn;  // whether to use flash attention [EXPERIMENTAL]
+        bool mla_attn;    // whether to use MLA attention [EXPERIMENTAL]

        // Abort callback
        // if it returns true, execution of llama_decode() will be aborted