From a29f62fc50cf37784d127b91bb1d8adb6d8ec31a Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 7 Jan 2026 07:36:00 +0000 Subject: [PATCH] Enable up to 4 GPUs for Mimo2-Flash --- src/llama.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index aa66afac..5b05ea72 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1764,11 +1764,12 @@ static bool llm_load_tensors( LLAMA_LOG_WARN("=======================================================\n\n"); split_mode = LLAMA_SPLIT_MODE_LAYER; } else { - if (model.arch == LLM_ARCH_MIMO2 && model.devices.size() > 2 && max_gpu != 2) { + if (model.arch == LLM_ARCH_MIMO2 && model.devices.size() > 4 && (max_gpu == 0 || max_gpu > 4)) { LLAMA_LOG_WARN("\n================================================================\n"); - LLAMA_LOG_WARN("Split mode 'graph' for Mimo2 does not work with more than 2 GPUs\n"); - LLAMA_LOG_WARN(" => setting max_gpu to 2\n"); + LLAMA_LOG_WARN("Split mode 'graph' for Mimo2 does not work with more than 4 GPUs\n"); + LLAMA_LOG_WARN(" => setting max_gpu to 4\n"); LLAMA_LOG_WARN("================================================================\n\n"); + max_gpu = 4; } } }