Fuse topk+view+get_rows+reshape+softmax (CPU)

This commit is contained in:
Iwan Kawrakow
2025-10-19 11:45:10 +03:00
parent 18d9f4fc4d
commit c8ed454564
4 changed files with 61 additions and 5 deletions

View File

@@ -836,10 +836,6 @@ llm_expert_gating_func_type gating_op,
ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
cb(weights, "ffn_moe_weights", il);
if (graph) {
ggml_build_forward_expand(graph, weights);
}
if (gating_op == LLM_EXPERT_GATING_FUNC_TYPE_SOFTMAX_WEIGHT) {
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);
weights = ggml_soft_max(ctx, weights); // [n_expert_used, n_tokens]
@@ -847,6 +843,10 @@ llm_expert_gating_func_type gating_op,
cb(weights, "ffn_moe_weights_softmax", il);
}
if (graph) {
ggml_build_forward_expand(graph, weights);
}
if (norm_w) {
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);