diff --git a/src/llama.cpp b/src/llama.cpp index c0434018..4f608092 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -108,6 +108,7 @@ #include #include #include +#include #include #include #include @@ -1723,6 +1724,15 @@ static void ggml_backend_add_from_device(llama_context* ctx, ggml_backend_t back } } +static bool is_model_split_supported(const llama_model & model) { + static std::unordered_set k_supported = { + LLM_ARCH_LLAMA, + LLM_ARCH_GLM4_MOE, + }; + auto it = k_supported.find(model.arch); + return it != k_supported.end(); +} + // Returns false if cancelled by progress_callback static bool llm_load_tensors( llama_model_loader & ml, @@ -1740,6 +1750,16 @@ static bool llm_load_tensors( auto & hparams = model.hparams; + if (split_mode == LLAMA_SPLIT_MODE_GRAPH) { + if (!is_model_split_supported(model)) { + LLAMA_LOG_WARN("\n=======================================================\n"); + LLAMA_LOG_WARN("Split mode 'graph' is not supported for this model\n"); + LLAMA_LOG_WARN(" => changing split mode to 'layer'\n"); + LLAMA_LOG_WARN("=======================================================\n\n"); + split_mode = LLAMA_SPLIT_MODE_LAYER; + } + } + model.split_mode = split_mode; model.main_gpu = main_gpu; model.n_gpu_layers = n_gpu_layers;