Adding fused rms_norm (#42)

* Fused rms_norm: works on the CPU

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2024-09-08 10:19:21 +03:00
committed by GitHub
parent 18f5bb47d8
commit d5aa49b93b
8 changed files with 326 additions and 2 deletions

View File

@@ -7987,6 +7987,16 @@ static struct ggml_tensor * llm_build_norm(
llm_norm_type type,
const llm_build_cb & cb,
int il, float scale_eps = 1) {
if (type == LLM_NORM_RMS && mw) {
cur = ggml_fused_rms_norm(ctx, cur, mw, scale_eps * hparams.f_norm_rms_eps);
if (mb) {
cb(cur, "fused_norm", il);
cur = ggml_add(ctx, cur, mb);
}
return cur;
}
switch (type) {
case LLM_NORM: cur = ggml_norm (ctx, cur, hparams.f_norm_eps); break;
case LLM_NORM_RMS: cur = ggml_rms_norm(ctx, cur, scale_eps * hparams.f_norm_rms_eps); break;