mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 03:41:53 +00:00
WIP: fix sm layer (MoE)
This commit is contained in:
@@ -1121,10 +1121,10 @@ llm_expert_gating_func_type gating_op,
|
|||||||
if (ffn_norm) {
|
if (ffn_norm) {
|
||||||
auto the_ffn_norm = ffn_norm->extra ? ((ggml_split_tensor_t *)ffn_norm->extra)->splits[lctx.model.main_gpu] : ffn_norm;
|
auto the_ffn_norm = ffn_norm->extra ? ((ggml_split_tensor_t *)ffn_norm->extra)->splits[lctx.model.main_gpu] : ffn_norm;
|
||||||
GGML_ASSERT(the_ffn_norm);
|
GGML_ASSERT(the_ffn_norm);
|
||||||
cur = llm_build_norm(ctx, input, lctx.model.hparams, the_ffn_norm, nullptr, LLM_NORM_RMS, cb, il);
|
cur = llm_build_norm(ctx, cur, lctx.model.hparams, the_ffn_norm, nullptr, LLM_NORM_RMS, cb, il);
|
||||||
cb(cur, "ffn_inp_normed", il);
|
cb(cur, "ffn_inp_normed", il);
|
||||||
}
|
}
|
||||||
else if (cur->type != GGML_TYPE_F32) {
|
if (cur->type != GGML_TYPE_F32) {
|
||||||
cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
|
cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
|
||||||
}
|
}
|
||||||
auto the_gate_inp = gate_inp->extra ? ((ggml_split_tensor_t *)gate_inp->extra)->splits[lctx.model.main_gpu] : gate_inp;
|
auto the_gate_inp = gate_inp->extra ? ((ggml_split_tensor_t *)gate_inp->extra)->splits[lctx.model.main_gpu] : gate_inp;
|
||||||
@@ -1139,8 +1139,12 @@ llm_expert_gating_func_type gating_op,
|
|||||||
the_exp_probs_b,
|
the_exp_probs_b,
|
||||||
n_expert, n_expert_used,
|
n_expert, n_expert_used,
|
||||||
type_op, norm_w, scale_w, w_scale,
|
type_op, norm_w, scale_w, w_scale,
|
||||||
gating_op, cb, il, graph, add_input);
|
gating_op, cb, il, graph, false);
|
||||||
cb(routed_out, "routed_out", il);
|
cb(routed_out, "routed_out", il);
|
||||||
|
if (add_input) {
|
||||||
|
routed_out = ggml_add(ctx, routed_out, input);
|
||||||
|
cb(routed_out, "routed_out_with_inp", il);
|
||||||
|
}
|
||||||
ggml_build_forward_expand(graph, routed_out);
|
ggml_build_forward_expand(graph, routed_out);
|
||||||
|
|
||||||
if (up_shexp && gate_shexp && down_shexp) {
|
if (up_shexp && gate_shexp && down_shexp) {
|
||||||
|
|||||||
Reference in New Issue
Block a user