mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-02 01:50:01 +00:00
Fuse sigmoid+add+grouped_topk+get_rows (CPU)
This commit is contained in:
@@ -827,8 +827,9 @@ llm_expert_gating_func_type gating_op,
|
||||
auto& hparams = lctx.model.hparams;
|
||||
selected_experts = ggml_grouped_topk(ctx, selection_probs, hparams.n_expert_groups, hparams.n_group_used, 2, n_expert_used);
|
||||
} else {
|
||||
selected_experts = ggml_top_k_thresh(ctx, selection_probs, n_expert_used,
|
||||
lctx.cparams.min_experts, lctx.cparams.thresh_experts); // [n_expert_used, n_tokens]
|
||||
//selected_experts = ggml_top_k_thresh(ctx, selection_probs, n_expert_used,
|
||||
// lctx.cparams.min_experts, lctx.cparams.thresh_experts); // [n_expert_used, n_tokens]
|
||||
selected_experts = ggml_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
}
|
||||
cb(selected_experts, "ffn_moe_topk", il);
|
||||
ggml_tensor * weights = ggml_get_rows(ctx,
|
||||
|
||||
Reference in New Issue
Block a user