cpu: turn off the openai topk fusing for now

Something is not right and I don't see the bug.
On the CPU one doesn't gain much if anything, so not a big loss.
This commit is contained in:
Iwan Kawrakow
2025-10-19 13:11:35 +03:00
parent b79aad9d07
commit 0fb9d4963f
2 changed files with 5 additions and 7 deletions

View File

@@ -22568,7 +22568,7 @@ static int ggml_compute_forward(struct ggml_compute_params * params, struct ggml
} break;
case GGML_OP_ARGSORT:
{
if (i + 5 < cgraph->n_nodes &&
if (false && i + 5 < cgraph->n_nodes &&
cgraph->nodes[i+1]->op == GGML_OP_VIEW &&
cgraph->nodes[i+2]->op == GGML_OP_GET_ROWS &&
cgraph->nodes[i+3]->op == GGML_OP_RESHAPE &&

View File

@@ -209,15 +209,15 @@ void iqk_argsort(ggml_tensor * dst, int ith, int nth) {
for (int j = 0; j < ne00; ++j) aux[j] = {data[j], j};
if (nk < ne00) {
if (order == GGML_SORT_ORDER_DESC) {
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end(), std::greater<std::pair<float,int>>{});
std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00, std::greater<std::pair<float,int>>{});
} else {
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end());
std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00);
}
} else {
if (order == GGML_SORT_ORDER_DESC) {
std::sort(aux.begin(), aux.end(), std::greater<std::pair<float,int>>{});
std::sort(aux.begin(), aux.begin() + ne00, std::greater<std::pair<float,int>>{});
} else {
std::sort(aux.begin(), aux.end());
std::sort(aux.begin(), aux.begin() + ne00);
}
}
auto y = (int32_t *)((char *)dst->data + ir*dst->nb[1]);
@@ -361,8 +361,6 @@ void iqk_openai_experts(struct ggml_tensor * topk, struct ggml_tensor * softmax,
GGML_ASSERT(ggml_is_contiguous(probs));
GGML_ASSERT(ggml_is_contiguous(softmax));
GGML_ASSERT(ne0 <= ne00);
//if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, softmax: %s\n", __func__, ne00, ne0, ggml_type_name(topk->type), ggml_type_name(softmax->type));
//if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, %ld x %ld x %ld x %ld, %zu x %zu x %zu x %zu\n", __func__, ne00, ne0, ggml_type_name(topk->type), topk->ne[0], topk->ne[1], topk->ne[2], topk->ne[3], topk->nb[0], topk->nb[1], topk->nb[2], topk->nb[3]);
size_t work_size = ne00;
auto& aux = get_work_buffer(work_size);