diff --git a/src/llama-model.h b/src/llama-model.h index e256a8a7..38ab6974 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -430,8 +430,8 @@ struct llama_model { if (arch == LLM_ARCH_QWEN3NEXT || arch == LLM_ARCH_QWEN35MOE) { return std::max(n_tokens * 40, 32u * n_tensors); } - return std::max(1024, 8*n_tensors); - //return 65536 * 2; + //return std::max(1024, 8*n_tensors); + return 65536; } bool has_tensor_overrides() const {