Turn on graph reuse by default

This commit is contained in:
Iwan Kawrakow
2025-12-27 07:22:46 +00:00
parent 2fe098e938
commit bf3ff8ec41
3 changed files with 7 additions and 2 deletions

View File

@@ -1251,6 +1251,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.graph_reuse = true; params.graph_reuse = true;
return true; return true;
} }
if (arg == "-no-gr" || arg == "--no-graph-reuse") {
params.graph_reuse = false;
return true;
}
if (arg == "-ser" || arg == "--smart-expert-reduction") { if (arg == "-ser" || arg == "--smart-expert-reduction") {
CHECK_ARG CHECK_ARG
auto values = string_split_pairs<int,float>(argv[i], ','); auto values = string_split_pairs<int,float>(argv[i], ',');
@@ -2131,6 +2135,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "*", "-no-mmad, --no-fused-mul-multiadd", "disable fused mul-multi_add (default: %s)", params.fused_mmad? "enabled" : "disabled" }); options.push_back({ "*", "-no-mmad, --no-fused-mul-multiadd", "disable fused mul-multi_add (default: %s)", params.fused_mmad? "enabled" : "disabled" });
//options.push_back({ "*", "-rcache, --rope-cache", "enable RoPE cache (default: %s)", params.rope_cache ? "enabled" : "disabled" }); //options.push_back({ "*", "-rcache, --rope-cache", "enable RoPE cache (default: %s)", params.rope_cache ? "enabled" : "disabled" });
options.push_back({ "*", "-gr, --graph-reuse", "enable graph reuse (default: %s)", params.graph_reuse ? "enabled" : "disabled" }); options.push_back({ "*", "-gr, --graph-reuse", "enable graph reuse (default: %s)", params.graph_reuse ? "enabled" : "disabled" });
options.push_back({ "*", "-no-gr, --no-graph-reuse", "disable graph reuse (default: %s)", !params.graph_reuse ? "enabled" : "disabled" });
options.push_back({ "*", "-ser, --smart-expert-reduction", "experts reduction (default: %d,%g)", params.min_experts, params.thresh_experts}); options.push_back({ "*", "-ser, --smart-expert-reduction", "experts reduction (default: %d,%g)", params.min_experts, params.thresh_experts});
options.push_back({ "*", "-mqkv, --merge-qkv,", "merge Q,K,V (default: %d)", params.merge_qkv}); options.push_back({ "*", "-mqkv, --merge-qkv,", "merge Q,K,V (default: %d)", params.merge_qkv});
options.push_back({ "*", "-khad, --k-cache-hadamard,", "Use Hadamard transform for K-cache (default: %d)", params.k_cache_hadamard}); options.push_back({ "*", "-khad, --k-cache-hadamard,", "Use Hadamard transform for K-cache (default: %d)", params.k_cache_hadamard});

View File

@@ -265,7 +265,7 @@ struct gpt_params {
bool fused_mmad = true; // fused mul+multi_add op bool fused_mmad = true; // fused mul+multi_add op
bool grouped_expert_routing = false; // if to use grouped expert routing (BailingMoeV2 arch) bool grouped_expert_routing = false; // if to use grouped expert routing (BailingMoeV2 arch)
bool rope_cache = false; // if to use RoPE cache (for supported models) bool rope_cache = false; // if to use RoPE cache (for supported models)
bool graph_reuse = false; // if to reuse compute graphs bool graph_reuse = true; // if to reuse compute graphs
int min_experts = -1; int min_experts = -1;
float thresh_experts = 0; float thresh_experts = 0;

View File

@@ -4049,7 +4049,7 @@ struct llama_context_params llama_context_default_params() {
/*.fused_up_gate =*/ true, /*.fused_up_gate =*/ true,
/*.fused_mmad =*/ true, /*.fused_mmad =*/ true,
/*.rope_cache =*/ false, /*.rope_cache =*/ false,
/*.graph_reuse =*/ false, /*.graph_reuse =*/ true,
/*.min_experts =*/ -1, /*.min_experts =*/ -1,
/*.thtesh_experts =*/ 0.0f, /*.thtesh_experts =*/ 0.0f,
/*.only_active_experts =*/ false, /*.only_active_experts =*/ false,