From ee9b05241450348678bcfdfa74dd785dd35ee414 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Sat, 26 Oct 2024 18:33:42 +0300 Subject: [PATCH] Bitnet: use the fused mul-silu in the FFN network I had forgotten that build_bitnet() does not use the standerd llm_build_ffn function, so the fused mul-silu didn't get used for Bitnet when I added it to llm_build_ffn. This gives us another ~1% speedup for TG-128. --- src/llama.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 27ba5d2f..1384123a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -13399,12 +13399,7 @@ struct llm_build_context { cb(cur, "ffn_gate", il); - - // combine this with the above scale into ggml_scaled_silu - cur = ggml_silu(ctx0, cur); - cb(cur, "ffn_silu", il); - - cur = ggml_mul(ctx0, cur, tmp); + cur = ggml_fused_mul_unary(ctx0, cur, tmp, GGML_UNARY_OP_SILU); cb(cur, "ffn_gate_par", il); cur = llm_build_norm(ctx0, cur, hparams,