Split mode graph for qwen3moe

2026-03-06 20:10:08 +00:00 · 2025-12-01 11:56:05 +00:00
parent 63d0389e18
commit c51968b6d8
4 changed files with 73 additions and 44 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1726,6 +1726,7 @@ static void ggml_backend_add_from_device(llama_context* ctx, ggml_backend_t back
 static bool is_model_split_supported(const llama_model & model) {
    static std::unordered_set<llm_arch> k_supported = {
        LLM_ARCH_LLAMA,
+        LLM_ARCH_QWEN3MOE,
        LLM_ARCH_GLM4_MOE,
    };
    auto it =  k_supported.find(model.arch);