Adding fused y*unary(x) op

2026-02-18 20:30:11 +00:00 · 2024-09-30 08:29:34 +03:00
parent cce49832c1
commit 6ef4f28aae
3 changed files with 144 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -8083,6 +8083,13 @@ static struct ggml_tensor * llm_build_ffn(
        cur = tmp;
    }

+    if (type_gate == LLM_FFN_PAR &&
+       (type_op == LLM_FFN_SILU || type_op == LLM_FFN_RELU || (type_op == LLM_FFN_GELU && !act_scales))) {
+        cur = ggml_fused_mul_unary(ctx, cur, tmp, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU :
+                                                  type_op == LLM_FFN_RELU ? GGML_UNARY_OP_RELU : GGML_UNARY_OP_GELU);
+    }
+    else {
+
    switch (type_op) {
        case LLM_FFN_SILU:
            {
@@ -8122,6 +8129,7 @@ static struct ggml_tensor * llm_build_ffn(
        cur = ggml_mul(ctx, cur, tmp);
        cb(cur, "ffn_gate_par", il);
    }
+    }

    if (down) {
        cur = llm_build_lora_mm(lctx, ctx, down, cur);