Change --n-cpu-moe to not keep expert biases on CPU (#841)

* Change --n-cpu-moe to not keep expert biases ion CPU

* Also for --cpu-moe

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-10-19 19:03:03 +03:00
committed by GitHub
parent 7a41b3b1f5
commit 0c050638b6

View File

@@ -1152,7 +1152,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
if (arg == "--cpu-moe" || arg == "-cmoe") { if (arg == "--cpu-moe" || arg == "-cmoe") {
params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps"), ggml_backend_cpu_buffer_type()}); params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps\\.weight"), ggml_backend_cpu_buffer_type()});
return true; return true;
} }
if (arg == "--n-cpu-moe" || arg == "-ncmoe") { if (arg == "--n-cpu-moe" || arg == "-ncmoe") {
@@ -1164,7 +1164,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
for (int32_t l = 0; l < n_layers; ++l) { for (int32_t l = 0; l < n_layers; ++l) {
std::string pattern = "blk\\." + std::to_string(l) + "\\.(ffn_(up|down|gate)_exps)"; std::string pattern = "blk\\." + std::to_string(l) + "\\.(ffn_(up|down|gate)_exps\\.weight)";
params.tensor_buft_overrides.push_back({strdup(pattern.c_str()), ggml_backend_cpu_buffer_type()}); params.tensor_buft_overrides.push_back({strdup(pattern.c_str()), ggml_backend_cpu_buffer_type()});
} }
return true; return true;