mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
Fused rms_norm WIP
This commit is contained in:
@@ -2616,6 +2616,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||||||
GGML_ASSERT(ne00 % 4 == 0);
|
GGML_ASSERT(ne00 % 4 == 0);
|
||||||
GGML_ASSERT(ggml_is_contiguous_1(src0));
|
GGML_ASSERT(ggml_is_contiguous_1(src0));
|
||||||
GGML_ASSERT(src1->ne[0] == src0->ne[0]);
|
GGML_ASSERT(src1->ne[0] == src0->ne[0]);
|
||||||
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(ggml_nrows(src1) == 1);
|
GGML_ASSERT(ggml_nrows(src1) == 1);
|
||||||
|
|
||||||
float eps;
|
float eps;
|
||||||
|
|||||||
@@ -7988,9 +7988,12 @@ static struct ggml_tensor * llm_build_norm(
|
|||||||
const llm_build_cb & cb,
|
const llm_build_cb & cb,
|
||||||
int il, float scale_eps = 1) {
|
int il, float scale_eps = 1) {
|
||||||
|
|
||||||
if (type == LLM_NORM_RMS && !mb) {
|
if (type == LLM_NORM_RMS && mw) {
|
||||||
cur = ggml_fused_rms_norm(ctx, cur, mw, scale_eps * hparams.f_norm_rms_eps);
|
cur = ggml_fused_rms_norm(ctx, cur, mw, scale_eps * hparams.f_norm_rms_eps);
|
||||||
cb(cur, "fused_norm", il);
|
if (mb) {
|
||||||
|
cb(cur, "fused_norm", il);
|
||||||
|
cur = ggml_add(ctx, cur, mb);
|
||||||
|
}
|
||||||
return cur;
|
return cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user