mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-08 15:30:15 +00:00
Split mode "graph" for GPT-OSS (#1118)
* Split mode "graph" for GPT-OSS * Force split_mode_f16 to false --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -1734,6 +1734,7 @@ static bool is_model_split_supported(const llama_model & model) {
|
||||
LLM_ARCH_QWEN3,
|
||||
LLM_ARCH_QWEN3VL,
|
||||
LLM_ARCH_HUNYUAN_MOE,
|
||||
LLM_ARCH_OPENAI_MOE,
|
||||
};
|
||||
auto it = k_supported.find(model.arch);
|
||||
return it != k_supported.end();
|
||||
@@ -4432,6 +4433,15 @@ struct llama_context * llama_new_context_with_model(
|
||||
//LLAMA_LOG_WARN("=====================================================================\n");
|
||||
cparams.mla_attn = 0;
|
||||
}
|
||||
if (model->arch == LLM_ARCH_OPENAI_MOE && model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
|
||||
if (cparams.split_mode_f16) {
|
||||
LLAMA_LOG_WARN("=====================================================================\n");
|
||||
LLAMA_LOG_WARN("GPT-OSS with split mode graph requires f32 precision\n");
|
||||
LLAMA_LOG_WARN(" => changing cparams.split_mode_f16 to 'false'\n");
|
||||
LLAMA_LOG_WARN("=====================================================================\n");
|
||||
cparams.split_mode_f16 = false;
|
||||
}
|
||||
}
|
||||
|
||||
LLAMA_LOG_INFO("%s: n_ctx = %u\n", __func__, cparams.n_ctx);
|
||||
LLAMA_LOG_INFO("%s: n_batch = %u\n", __func__, cparams.n_batch);
|
||||
|
||||
Reference in New Issue
Block a user