mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-06 12:00:29 +00:00
Use standard attention for Ministral3 (#1032)
Required adding the "temperature scaling" to the standard attention implementation. But in this way split mode "graph" is automatically supported. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -1728,6 +1728,7 @@ static bool is_model_split_supported(const llama_model & model) {
|
||||
LLM_ARCH_LLAMA,
|
||||
LLM_ARCH_QWEN3MOE,
|
||||
LLM_ARCH_GLM4_MOE,
|
||||
LLM_ARCH_MISTRAL3,
|
||||
};
|
||||
auto it = k_supported.find(model.arch);
|
||||
return it != k_supported.end();
|
||||
|
||||
Reference in New Issue
Block a user