Fuse sum_rows and div with topk-moe (#984)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-11-19 13:44:09 +01:00
committed by GitHub
parent 047a519771
commit d764edd652
3 changed files with 45 additions and 21 deletions

View File

@@ -865,10 +865,6 @@ llm_expert_gating_func_type gating_op,
cb(weights, "ffn_moe_weights_softmax", il);
}
if (graph) {
ggml_build_forward_expand(graph, weights);
}
if (norm_w) {
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);
@@ -890,6 +886,10 @@ llm_expert_gating_func_type gating_op,
cb(weights, "ffn_moe_weights_scaled", il);
}
if (graph) {
ggml_build_forward_expand(graph, weights);
}
cur = ggml_reshape_3d(ctx, cur, n_embd, 1, n_tokens);
if (weight_before_ffn) {