Fused norm (#1086)

* Adding fused_norm - same idea as fused_rms_norm * Avoid computing the attention reduce op for cohere2 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-29 02:41:47 +00:00 · 2025-12-24 15:22:43 +01:00
parent 5e64235d4c
commit ada5cc1523
7 changed files with 273 additions and 29 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -691,6 +691,7 @@ extern "C" {

        GGML_OP_REDUCE,
        GGML_OP_FAKE_CPY,
+        GGML_OP_FUSED_NORM,

        GGML_OP_COUNT,
    };
@@ -1487,6 +1488,18 @@ extern "C" {
            struct ggml_tensor  * b,
            float                 eps);

+    GGML_API struct ggml_tensor * ggml_fused_norm(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b,
+            float                 eps);
+
+    GGML_API struct ggml_tensor * ggml_fused_norm_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * b,
+            float                 eps);
+
    // group normalize along ne0*ne1*n_groups
    // used in stable-diffusion
    GGML_API struct ggml_tensor * ggml_group_norm(