From 0c050638b6dfd3f1ea4f8a7e02ce0b43da251b42 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Sun, 19 Oct 2025 19:03:03 +0300 Subject: [PATCH] Change --n-cpu-moe to not keep expert biases on CPU (#841) * Change --n-cpu-moe to not keep expert biases ion CPU * Also for --cpu-moe --------- Co-authored-by: Iwan Kawrakow --- common/common.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index e7ade95d..d0f38ed3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1152,7 +1152,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } if (arg == "--cpu-moe" || arg == "-cmoe") { - params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps"), ggml_backend_cpu_buffer_type()}); + params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps\\.weight"), ggml_backend_cpu_buffer_type()}); return true; } if (arg == "--n-cpu-moe" || arg == "-ncmoe") { @@ -1164,7 +1164,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } for (int32_t l = 0; l < n_layers; ++l) { - std::string pattern = "blk\\." + std::to_string(l) + "\\.(ffn_(up|down|gate)_exps)"; + std::string pattern = "blk\\." + std::to_string(l) + "\\.(ffn_(up|down|gate)_exps\\.weight)"; params.tensor_buft_overrides.push_back({strdup(pattern.c_str()), ggml_backend_cpu_buffer_type()}); } return true;