cpu: turn off the openai topk fusing for now

Something is not right and I don't see the bug. On the CPU one doesn't gain much if anything, so not a big loss.
2026-03-04 19:10:03 +00:00 · 2025-10-19 13:11:35 +03:00
parent b79aad9d07
commit 0fb9d4963f
2 changed files with 5 additions and 7 deletions
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -22568,7 +22568,7 @@ static int ggml_compute_forward(struct ggml_compute_params * params, struct ggml
            } break;
        case GGML_OP_ARGSORT:
            {
-                if (i + 5 < cgraph->n_nodes &&
+                if (false && i + 5 < cgraph->n_nodes &&
                    cgraph->nodes[i+1]->op == GGML_OP_VIEW &&
                    cgraph->nodes[i+2]->op == GGML_OP_GET_ROWS &&
                    cgraph->nodes[i+3]->op == GGML_OP_RESHAPE &&
--- a/ggml/src/iqk/iqk_cpu_ops.cpp
+++ b/ggml/src/iqk/iqk_cpu_ops.cpp
@@ -209,15 +209,15 @@ void iqk_argsort(ggml_tensor * dst, int ith, int nth) {
        for (int j = 0; j < ne00; ++j) aux[j] = {data[j], j};
        if (nk < ne00) {
            if (order == GGML_SORT_ORDER_DESC) {
-                std::partial_sort(aux.begin(), aux.begin() + nk, aux.end(), std::greater<std::pair<float,int>>{});
+                std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00, std::greater<std::pair<float,int>>{});
            } else {
-                std::partial_sort(aux.begin(), aux.begin() + nk, aux.end());
+                std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00);
            }
        } else {
            if (order == GGML_SORT_ORDER_DESC) {
-                std::sort(aux.begin(), aux.end(), std::greater<std::pair<float,int>>{});
+                std::sort(aux.begin(), aux.begin() + ne00, std::greater<std::pair<float,int>>{});
            } else {
-                std::sort(aux.begin(), aux.end());
+                std::sort(aux.begin(), aux.begin() + ne00);
            }
        }
        auto y = (int32_t *)((char *)dst->data + ir*dst->nb[1]);
@@ -361,8 +361,6 @@ void iqk_openai_experts(struct ggml_tensor * topk, struct ggml_tensor * softmax,
    GGML_ASSERT(ggml_is_contiguous(probs));
    GGML_ASSERT(ggml_is_contiguous(softmax));
    GGML_ASSERT(ne0 <= ne00);
-    //if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, softmax: %s\n", __func__, ne00, ne0, ggml_type_name(topk->type), ggml_type_name(softmax->type));
-    //if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, %ld x %ld x %ld x %ld, %zu x %zu x %zu x %zu\n", __func__, ne00, ne0, ggml_type_name(topk->type), topk->ne[0], topk->ne[1], topk->ne[2], topk->ne[3], topk->nb[0], topk->nb[1], topk->nb[2], topk->nb[3]);

    size_t work_size = ne00;
    auto& aux = get_work_buffer(work_size);