Fused unary(x)*y (#70)

* Adding fused y*unary(x) op

* Fused y*unary(x) op: CUDA

* Fused y*unary(x) op: dedicated CPU implementation for silu and gelu

* Fused y*unary(x) op: Metal

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2024-10-02 17:05:56 +03:00
committed by GitHub
parent 104e7e26c4
commit 4390096212
8 changed files with 363 additions and 2 deletions

View File

@@ -8083,6 +8083,13 @@ static struct ggml_tensor * llm_build_ffn(
cur = tmp;
}
if (type_gate == LLM_FFN_PAR &&
(type_op == LLM_FFN_SILU || type_op == LLM_FFN_RELU || (type_op == LLM_FFN_GELU && !act_scales))) {
cur = ggml_fused_mul_unary(ctx, cur, tmp, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU :
type_op == LLM_FFN_RELU ? GGML_UNARY_OP_RELU : GGML_UNARY_OP_GELU);
}
else {
switch (type_op) {
case LLM_FFN_SILU:
{
@@ -8122,6 +8129,7 @@ static struct ggml_tensor * llm_build_ffn(
cur = ggml_mul(ctx, cur, tmp);
cb(cur, "ffn_gate_par", il);
}
}
if (down) {
cur = llm_build_lora_mm(lctx, ctx, down, cur);