mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 06:34:13 +00:00
Mimo-V2-Flash support (#1096)
* Mimo-2 support * Fix bug for head sizes not being the same It still does not solve the Mimo-2 quantized cache issue. * Fix quantized cache * Minor --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -4905,6 +4905,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_OPENAI_MOE:
|
||||
case LLM_ARCH_BAILINGMOE2:
|
||||
case LLM_ARCH_MINIMAX_M2:
|
||||
case LLM_ARCH_MIMO2:
|
||||
return LLAMA_ROPE_TYPE_NEOX;
|
||||
|
||||
case LLM_ARCH_QWEN2VL:
|
||||
|
||||
Reference in New Issue
Block a user