mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 03:41:53 +00:00
Adding fused mul+multi_add + CPU implementation
This commit is contained in:
@@ -949,6 +949,7 @@ llm_expert_gating_func_type gating_op,
|
|||||||
}
|
}
|
||||||
experts = ggml_mul(ctx, experts, weights);
|
experts = ggml_mul(ctx, experts, weights);
|
||||||
cb(experts, "ffn_moe_weighted", il);
|
cb(experts, "ffn_moe_weighted", il);
|
||||||
|
return experts;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n_expert_used == 1) {
|
if (n_expert_used == 1) {
|
||||||
|
|||||||
Reference in New Issue
Block a user