mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-04 19:10:03 +00:00
cpu: turn off the openai topk fusing for now
Something is not right and I don't see the bug. On the CPU one doesn't gain much if anything, so not a big loss.
This commit is contained in:
@@ -22568,7 +22568,7 @@ static int ggml_compute_forward(struct ggml_compute_params * params, struct ggml
|
||||
} break;
|
||||
case GGML_OP_ARGSORT:
|
||||
{
|
||||
if (i + 5 < cgraph->n_nodes &&
|
||||
if (false && i + 5 < cgraph->n_nodes &&
|
||||
cgraph->nodes[i+1]->op == GGML_OP_VIEW &&
|
||||
cgraph->nodes[i+2]->op == GGML_OP_GET_ROWS &&
|
||||
cgraph->nodes[i+3]->op == GGML_OP_RESHAPE &&
|
||||
|
||||
@@ -209,15 +209,15 @@ void iqk_argsort(ggml_tensor * dst, int ith, int nth) {
|
||||
for (int j = 0; j < ne00; ++j) aux[j] = {data[j], j};
|
||||
if (nk < ne00) {
|
||||
if (order == GGML_SORT_ORDER_DESC) {
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end(), std::greater<std::pair<float,int>>{});
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00, std::greater<std::pair<float,int>>{});
|
||||
} else {
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end());
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.begin() + ne00);
|
||||
}
|
||||
} else {
|
||||
if (order == GGML_SORT_ORDER_DESC) {
|
||||
std::sort(aux.begin(), aux.end(), std::greater<std::pair<float,int>>{});
|
||||
std::sort(aux.begin(), aux.begin() + ne00, std::greater<std::pair<float,int>>{});
|
||||
} else {
|
||||
std::sort(aux.begin(), aux.end());
|
||||
std::sort(aux.begin(), aux.begin() + ne00);
|
||||
}
|
||||
}
|
||||
auto y = (int32_t *)((char *)dst->data + ir*dst->nb[1]);
|
||||
@@ -361,8 +361,6 @@ void iqk_openai_experts(struct ggml_tensor * topk, struct ggml_tensor * softmax,
|
||||
GGML_ASSERT(ggml_is_contiguous(probs));
|
||||
GGML_ASSERT(ggml_is_contiguous(softmax));
|
||||
GGML_ASSERT(ne0 <= ne00);
|
||||
//if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, softmax: %s\n", __func__, ne00, ne0, ggml_type_name(topk->type), ggml_type_name(softmax->type));
|
||||
//if (ith == 0) printf("%s: ne00 = %d, ne0 = %d, topk: %s, %ld x %ld x %ld x %ld, %zu x %zu x %zu x %zu\n", __func__, ne00, ne0, ggml_type_name(topk->type), topk->ne[0], topk->ne[1], topk->ne[2], topk->ne[3], topk->nb[0], topk->nb[1], topk->nb[2], topk->nb[3]);
|
||||
|
||||
size_t work_size = ne00;
|
||||
auto& aux = get_work_buffer(work_size);
|
||||
|
||||
Reference in New Issue
Block a user