mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-05 03:20:00 +00:00
Adding fused mul+multi_add + CPU implementation
This commit is contained in:
@@ -949,6 +949,7 @@ llm_expert_gating_func_type gating_op,
|
||||
}
|
||||
experts = ggml_mul(ctx, experts, weights);
|
||||
cb(experts, "ffn_moe_weighted", il);
|
||||
return experts;
|
||||
}
|
||||
|
||||
if (n_expert_used == 1) {
|
||||
|
||||
Reference in New Issue
Block a user