Adding fused rms_norm (#42)

* Fused rms_norm: works on the CPU

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

* Fused rms_norm WIP

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2024-09-08 10:19:21 +03:00
committed by GitHub
parent 18f5bb47d8
commit d5aa49b93b
8 changed files with 326 additions and 2 deletions

View File

@@ -480,6 +480,7 @@ extern "C" {
GGML_OP_RMS_NORM,
GGML_OP_RMS_NORM_BACK,
GGML_OP_GROUP_NORM,
GGML_OP_FUSED_RMS_NORM,
GGML_OP_MUL_MAT,
GGML_OP_MUL_MAT_ID,
@@ -1159,6 +1160,18 @@ extern "C" {
struct ggml_tensor * a,
float eps);
GGML_API struct ggml_tensor * ggml_fused_rms_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
float eps);
GGML_API struct ggml_tensor * ggml_fused_rms_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
float eps);
// group normalize along ne0*ne1*n_groups
// used in stable-diffusion
GGML_API struct ggml_tensor * ggml_group_norm(