Fuse sigmoid+add+grouped_topk+get_rows (CPU)

This commit is contained in:
Iwan Kawrakow
2025-10-18 10:09:32 +03:00
parent 747f411da5
commit 8f5f93e6b1
4 changed files with 131 additions and 3 deletions

View File

@@ -827,8 +827,9 @@ llm_expert_gating_func_type gating_op,
auto& hparams = lctx.model.hparams;
selected_experts = ggml_grouped_topk(ctx, selection_probs, hparams.n_expert_groups, hparams.n_group_used, 2, n_expert_used);
} else {
selected_experts = ggml_top_k_thresh(ctx, selection_probs, n_expert_used,
lctx.cparams.min_experts, lctx.cparams.thresh_experts); // [n_expert_used, n_tokens]
//selected_experts = ggml_top_k_thresh(ctx, selection_probs, n_expert_used,
// lctx.cparams.min_experts, lctx.cparams.thresh_experts); // [n_expert_used, n_tokens]
selected_experts = ggml_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
}
cb(selected_experts, "ffn_moe_topk", il);
ggml_tensor * weights = ggml_get_rows(ctx,