Add command line option

This time the option is ON by default, and one needs to turn it
off via -no-fug or --no-fused-up-gate
This commit is contained in:
Iwan Kawrakow
2025-08-30 11:56:37 +03:00
parent df066ced5e
commit 3bc7acf1bd
5 changed files with 48 additions and 7 deletions

View File

@@ -419,7 +419,8 @@ extern "C" {
bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
int mla_attn; // whether to use MLA attention [EXPERIMENTAL]
int attn_max_batch; // maximum batch size for attention computations [EXPERIMENTAL]
bool fused_moe_up_gate; // whether to use fused MoE up/down op [EXPERIMENTAL]
bool fused_moe_up_gate; // whether to use fused MoE up/gate op
bool fused_up_gate; // whether to use fused up/gate op [EXPERIMENTAL]
int min_experts;
float thresh_experts;