From c4c266847fc2ef14627646323db502e3bbb653ae Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Tue, 2 Dec 2025 08:18:55 +0000 Subject: [PATCH] Slightly better graph split strategy --- src/llama-build-context.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp index 123fc185..23ef445f 100644 --- a/src/llama-build-context.cpp +++ b/src/llama-build-context.cpp @@ -1133,7 +1133,10 @@ llm_expert_gating_func_type gating_op, results[id] = shared_out; } cur = ggml_add(ctx, results[0], results[1]); - cur->op_params[0] = 0xff; + if (cur->ne[1] > 32) { + // Force a graph split + cur->op_params[0] = 0xff; + } cb(cur, "ffn_shared_combined", il); for (int id = 2; id < int(results.size()); ++id) { cur = ggml_add(ctx, cur, results[id]);