mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-27 00:24:11 +00:00
WIP - not working
This commit is contained in:
@@ -910,7 +910,8 @@ ggml_tensor * llm_build_context::llm_build_moe_ffn(
|
||||
bool scale_w,
|
||||
float w_scale,
|
||||
llm_expert_gating_func_type gating_op,
|
||||
const llm_build_cb & cb, int il, ggml_cgraph * graph, bool add_input) {
|
||||
const llm_build_cb & cb, int il, ggml_cgraph * graph, bool add_input,
|
||||
ggml_tensor * up_gate_exps, ggml_tensor * up_gate_exps_b) {
|
||||
|
||||
auto input = cur;
|
||||
|
||||
@@ -1025,6 +1026,19 @@ llm_expert_gating_func_type gating_op,
|
||||
bool can_use_fmoe = type_op == LLM_FFN_SILU || type_op == LLM_FFN_GELU || type_op == LLM_FFN_SWIGLU_OAI_MOE;
|
||||
|
||||
ggml_tensor * par;
|
||||
if (can_use_fmoe && up_gate_exps) {
|
||||
if (up_gate_exps_b) {
|
||||
par = ggml_moe_up_gate_ext(ctx, up_gate_exps, nullptr, cur, selected_experts, up_gate_exps_b, nullptr,
|
||||
type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU :
|
||||
type_op == LLM_FFN_GELU ? GGML_UNARY_OP_GELU : GGML_UNARY_OP_SWIGLU_OAI);
|
||||
} else {
|
||||
GGML_ASSERT(type_op != LLM_FFN_SWIGLU_OAI_MOE);
|
||||
par = ggml_moe_up_gate(ctx, up_gate_exps, nullptr, cur, selected_experts,
|
||||
type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU : GGML_UNARY_OP_GELU);
|
||||
}
|
||||
} else {
|
||||
GGML_ASSERT(!up_gate_exps && !up_gate_exps_b);
|
||||
|
||||
if (can_use_fmoe && lctx.cparams.fused_moe_up_gate && up_exps->type == gate_exps->type) {
|
||||
if (up_exps_b || gate_exps_b) {
|
||||
par = ggml_moe_up_gate_ext(ctx, up_exps, gate_exps, cur, selected_experts, up_exps_b, gate_exps_b,
|
||||
@@ -1069,6 +1083,7 @@ llm_expert_gating_func_type gating_op,
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
cb(par, "ffn_moe_gate_par", il);
|
||||
|
||||
@@ -1130,7 +1145,8 @@ ggml_tensor * llm_build_context::llm_build_std_moe_ffn(ggml_context * ctx, llama
|
||||
float w_scale,
|
||||
llm_expert_gating_func_type gating_op,
|
||||
llm_ffn_op_type type_op_shexp,
|
||||
const llm_build_cb & cb, int il, ggml_cgraph * graph, bool add_input) {
|
||||
const llm_build_cb & cb, int il, ggml_cgraph * graph, bool add_input,
|
||||
ggml_tensor * up_gate_exps, ggml_tensor * up_gate_exps_b) {
|
||||
|
||||
auto split_up_exps = (ggml_split_tensor_t *)up_exps->extra;
|
||||
auto split_gate_exps = (ggml_split_tensor_t *)gate_exps->extra;
|
||||
@@ -1164,7 +1180,7 @@ llm_expert_gating_func_type gating_op,
|
||||
the_exp_probs_b,
|
||||
n_expert, n_expert_used,
|
||||
type_op, norm_w, scale_w, w_scale,
|
||||
gating_op, cb, il, graph, false);
|
||||
gating_op, cb, il, graph, false, up_gate_exps, up_gate_exps_b);
|
||||
cb(routed_out, "routed_out", il);
|
||||
if (add_input) {
|
||||
routed_out = ggml_add(ctx, routed_out, input);
|
||||
@@ -8410,7 +8426,8 @@ ggml_cgraph * llm_build_context::build_openai_moe() {
|
||||
n_expert, n_expert_used,
|
||||
LLM_FFN_SWIGLU_OAI_MOE, false, false, 0.0f,
|
||||
LLM_EXPERT_GATING_FUNC_TYPE_SOFTMAX_WEIGHT,
|
||||
LLM_FFN_SWIGLU_OAI_MOE, cb, il, gf, true);
|
||||
LLM_FFN_SWIGLU_OAI_MOE, cb, il, gf, true,
|
||||
model.layers[il].ffn_up_gate_exps, model.layers[il].ffn_up_gate_exps_b);
|
||||
|
||||
cur = lctx.cvec.apply_to(ctx0, cur, il);
|
||||
cb(cur, "l_out", il);
|
||||
|
||||
Reference in New Issue
Block a user