Vulkan: adding GGML_OP_MULTI_ADD implementation (#582)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-03-04 11:00:00 +00:00 · 2025-07-04 08:33:43 +02:00
parent 3e024de1da
commit 235c989e39
3 changed files with 60 additions and 23 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9870,28 +9870,28 @@ llm_expert_gating_func_type   gating_op,
        cb(cur, "ffn_moe_weighted", il);
    }

-#ifdef GGML_USE_VULKAN
-    // aggregate experts
-    ggml_tensor * moe_out = nullptr;
-    //ggml_tensor * first_expert = nullptr;
-    for (int i = 0; i < n_expert_used; ++i) {
-        ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
-                experts->nb[2], i*experts->nb[1]);
-
-        if (i == 0) {
-            moe_out = cur_expert;
-        } else {
-            moe_out = ggml_add(ctx, moe_out, cur_expert);
-        }
-    }
-
-    if (n_expert_used == 1) {
-        // avoid returning a non-contiguous tensor
-        moe_out = ggml_cont(ctx, moe_out);
-    }
-
-    return moe_out;
-#else
+//#ifdef GGML_USE_VULKAN
+//    // aggregate experts
+//    ggml_tensor * moe_out = nullptr;
+//    //ggml_tensor * first_expert = nullptr;
+//    for (int i = 0; i < n_expert_used; ++i) {
+//        ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
+//                experts->nb[2], i*experts->nb[1]);
+//
+//        if (i == 0) {
+//            moe_out = cur_expert;
+//        } else {
+//            moe_out = ggml_add(ctx, moe_out, cur_expert);
+//        }
+//    }
+//
+//    if (n_expert_used == 1) {
+//        // avoid returning a non-contiguous tensor
+//        moe_out = ggml_cont(ctx, moe_out);
+//    }
+//
+//    return moe_out;
+//#else
    if (n_expert_used == 1) {
        return ggml_cont(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0));
    }
@@ -9900,7 +9900,7 @@ llm_expert_gating_func_type   gating_op,
                             ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], experts->nb[1]));
    }
    return ggml_multi_add(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0), n_expert_used);
-#endif
+//#endif

 }