Fused unary(x)*y (#70)

* Adding fused y*unary(x) op

* Fused y*unary(x) op: CUDA

* Fused y*unary(x) op: dedicated CPU implementation for silu and gelu

* Fused y*unary(x) op: Metal

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2024-10-02 17:05:56 +03:00
committed by GitHub
parent 104e7e26c4
commit 4390096212
8 changed files with 363 additions and 2 deletions

View File

@@ -487,6 +487,7 @@ extern "C" {
GGML_OP_RMS_NORM_BACK,
GGML_OP_GROUP_NORM,
GGML_OP_FUSED_RMS_NORM,
GGML_OP_FUSED_MUL_UNARY,
GGML_OP_MUL_MAT,
GGML_OP_MUL_MAT_ID,
@@ -963,6 +964,18 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
GGML_API struct ggml_tensor * ggml_fused_mul_unary(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
enum ggml_unary_op op);
GGML_API struct ggml_tensor * ggml_fused_mul_unary_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
enum ggml_unary_op op);
GGML_API struct ggml_tensor * ggml_div(
struct ggml_context * ctx,
struct ggml_tensor * a,