Fused mul + multi_add op (#858)

* Adding fused mul+multi_add + CPU implementation

* fused mul+multi_add: CUDA

* fused mul+multi_add: command line argument to disable it

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-10-24 07:40:35 +03:00
committed by GitHub
parent 483cea527d
commit db3ba4999f
15 changed files with 211 additions and 38 deletions

View File

@@ -619,6 +619,7 @@ extern "C" {
GGML_OP_OUT_PROD,
GGML_OP_FUSED_UP_GATE,
GGML_OP_MOE_FUSED_UP_GATE,
GGML_OP_MUL_MULTI_ADD,
GGML_OP_SCALE,
GGML_OP_SET,
@@ -1083,6 +1084,11 @@ extern "C" {
struct ggml_tensor * a,
int n_experts);
GGML_API struct ggml_tensor * ggml_mul_multi_add(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
// dst = a
// view(dst, nb1, nb2, nb3, offset) += b
// return dst