mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-08 15:30:15 +00:00
Fuse sum_rows and div with topk-moe (#984)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -865,10 +865,6 @@ llm_expert_gating_func_type gating_op,
|
||||
cb(weights, "ffn_moe_weights_softmax", il);
|
||||
}
|
||||
|
||||
if (graph) {
|
||||
ggml_build_forward_expand(graph, weights);
|
||||
}
|
||||
|
||||
if (norm_w) {
|
||||
weights = ggml_reshape_2d(ctx, weights, n_expert_used, n_tokens);
|
||||
|
||||
@@ -890,6 +886,10 @@ llm_expert_gating_func_type gating_op,
|
||||
cb(weights, "ffn_moe_weights_scaled", il);
|
||||
}
|
||||
|
||||
if (graph) {
|
||||
ggml_build_forward_expand(graph, weights);
|
||||
}
|
||||
|
||||
cur = ggml_reshape_3d(ctx, cur, n_embd, 1, n_tokens);
|
||||
|
||||
if (weight_before_ffn) {
|
||||
|
||||
Reference in New Issue
Block a user