Make ooae on by default and add to llama-bench (#842)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-10-20 08:32:41 +03:00
committed by GitHub
parent 0c050638b6
commit 36f9601e8d
3 changed files with 31 additions and 5 deletions

View File

@@ -255,7 +255,7 @@ struct gpt_params {
bool repack_tensors = false; // repack tensors if interleaved variant is available
bool use_thp = false; // use transparent huge pages (linux only)
bool validate_quants = false; // if true, check for NaNs while loading the model
bool only_active_exps = false; // if true, offload only active experts (relevant only for hybrid CPU/GPU)
bool only_active_exps = true; // if true, offload only active experts (relevant only for hybrid CPU/GPU)
std::string cache_type_k = "f16"; // KV cache data type for the K
std::string cache_type_v = "f16"; // KV cache data type for the V