Make ooae on by default and add to llama-bench (#842)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-30 11:21:56 +00:00 · 2025-10-20 08:32:41 +03:00
parent 0c050638b6
commit 36f9601e8d
3 changed files with 31 additions and 5 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -255,7 +255,7 @@ struct gpt_params {
    bool repack_tensors    = false; // repack tensors if interleaved variant is available
    bool use_thp           = false; // use transparent huge pages (linux only)
    bool validate_quants   = false; // if true, check for NaNs while loading the model
-    bool only_active_exps  = false; // if true, offload only active experts (relevant only for hybrid CPU/GPU)
+    bool only_active_exps  = true;  // if true, offload only active experts (relevant only for hybrid CPU/GPU)

    std::string cache_type_k = "f16"; // KV cache data type for the K
    std::string cache_type_v = "f16"; // KV cache data type for the V