mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-12 06:50:08 +00:00
Fuse topk+view+get_rows+reshape+softmax (CPU)
This commit is contained in:
@@ -836,10 +836,6 @@ llm_expert_gating_func_type gating_op,
|
||||
ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
cb(weights, "ffn_moe_weights", il);
|
||||
|
||||
if (graph) {
|
||||
ggml_build_forward_expand(graph, weights);
|
||||
}
|
||||
|
||||
if (gating_op == LLM_EXPERT_GATING_FUNC_TYPE_SOFTMAX_WEIGHT) {
|
||||
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);
|
||||
weights = ggml_soft_max(ctx, weights); // [n_expert_used, n_tokens]
|
||||
@@ -847,6 +843,10 @@ llm_expert_gating_func_type gating_op,
|
||||
cb(weights, "ffn_moe_weights_softmax", il);
|
||||
}
|
||||
|
||||
if (graph) {
|
||||
ggml_build_forward_expand(graph, weights);
|
||||
}
|
||||
|
||||
if (norm_w) {
|
||||
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user