mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-21 06:59:21 +00:00
Change flash attention and fmoe to be on by default (#863)
* Change fmoe to be on by default * Change default fmoe also in llama-bench * Change flash attention to be on by default --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -260,7 +260,7 @@ struct cmd_params {
|
||||
bool verbose;
|
||||
bool warmup;
|
||||
bool repack = false;
|
||||
bool fmoe = false;
|
||||
bool fmoe = true;
|
||||
bool ger = false; // ger = Grouped Expert Routing
|
||||
bool no_fug = false;
|
||||
bool use_thp = false;
|
||||
@@ -285,7 +285,7 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
|
||||
/* main_gpu */ {0},
|
||||
/* no_kv_offload */ {false},
|
||||
/* flash_attn */ {false},
|
||||
/* flash_attn */ {true},
|
||||
/* mla_attn */ {0},
|
||||
/* attn_max_batch */ {0},
|
||||
/* ser */ {{-1,0.0f}},
|
||||
@@ -298,7 +298,7 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* verbose */ false,
|
||||
/* warmup */ true,
|
||||
/* repack */ false,
|
||||
/* fmoe */ false,
|
||||
/* fmoe */ true,
|
||||
/* ger */ false,
|
||||
/* no_fug */ false,
|
||||
/* use_thp */ false,
|
||||
@@ -846,7 +846,7 @@ struct cmd_params_instance {
|
||||
bool use_mmap;
|
||||
bool embeddings;
|
||||
bool repack = false;
|
||||
bool fmoe = false;
|
||||
bool fmoe = true;
|
||||
bool ger = false;
|
||||
bool no_fug = false;
|
||||
bool use_thp = false;
|
||||
|
||||
Reference in New Issue
Block a user