Fused norm (#1086)

* Adding fused_norm - same idea as fused_rms_norm

* Avoid computing the attention reduce op for cohere2

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-12-24 15:22:43 +01:00
committed by GitHub
parent 5e64235d4c
commit ada5cc1523
7 changed files with 273 additions and 29 deletions

View File

@@ -691,6 +691,7 @@ extern "C" {
GGML_OP_REDUCE,
GGML_OP_FAKE_CPY,
GGML_OP_FUSED_NORM,
GGML_OP_COUNT,
};
@@ -1487,6 +1488,18 @@ extern "C" {
struct ggml_tensor * b,
float eps);
GGML_API struct ggml_tensor * ggml_fused_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
float eps);
GGML_API struct ggml_tensor * ggml_fused_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
float eps);
// group normalize along ne0*ne1*n_groups
// used in stable-diffusion
GGML_API struct ggml_tensor * ggml_group_norm(