mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Better argsort (CPU) (#835)
* Better argsort (CPU) * Minor --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -256,8 +256,8 @@ if (GGML_BLAS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set (GGML_SOURCES_IQK iqk/iqk_quantize.cpp)
|
||||
set (GGML_HEADERS_IQK iqk/iqk_config.h)
|
||||
set (GGML_SOURCES_IQK iqk/iqk_quantize.cpp iqk/iqk_cpu_ops.cpp)
|
||||
set (GGML_HEADERS_IQK iqk/iqk_config.h iqk/iqk_cpu_ops.h)
|
||||
if (GGML_IQK_MUL_MAT)
|
||||
message(STATUS "Using optimized iqk matrix multiplications")
|
||||
add_compile_definitions(GGML_USE_IQK_MULMAT)
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "ggml.h"
|
||||
#include "ggml-aarch64.h"
|
||||
#include "iqk/iqk_quantize.h"
|
||||
#include "iqk/iqk_cpu_ops.h"
|
||||
#if GGML_USE_IQK_MULMAT
|
||||
#include "iqk/iqk_mul_mat.h"
|
||||
#include "iqk/iqk_config.h"
|
||||
@@ -9408,6 +9409,7 @@ struct ggml_tensor * ggml_argsort(
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
||||
ggml_set_op_params_i32(result, 1, (int32_t) a->ne[0]);
|
||||
|
||||
result->op = GGML_OP_ARGSORT;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@@ -9446,6 +9448,7 @@ struct ggml_tensor * ggml_top_k(
|
||||
GGML_ASSERT(a->ne[0] >= k);
|
||||
|
||||
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
||||
ggml_set_op_params_i32(result, 1, k);
|
||||
|
||||
result = ggml_view_4d(ctx, result,
|
||||
k, result->ne[1], result->ne[2], result->ne[3],
|
||||
@@ -19942,7 +19945,8 @@ static void ggml_compute_forward_argsort(
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_argsort_f32(params, dst);
|
||||
iqk_argsort(dst, params->ith, params->nth);
|
||||
//ggml_compute_forward_argsort_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
|
||||
57
ggml/src/iqk/iqk_cpu_ops.cpp
Normal file
57
ggml/src/iqk/iqk_cpu_ops.cpp
Normal file
@@ -0,0 +1,57 @@
|
||||
//
|
||||
// Copyright (C) 2025 Iwan Kawrakow
|
||||
// MIT license
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
#include "iqk_cpu_ops.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
void iqk_grouped_top_k([[maybe_unused]] ggml_tensor * dst, [[maybe_unused]] int ith, [[maybe_unused]] int nth) {
|
||||
}
|
||||
|
||||
void iqk_argsort(ggml_tensor * dst, int ith, int nth) {
|
||||
|
||||
auto src = dst->src[0];
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
|
||||
auto nrows = ggml_nrows(src);
|
||||
auto npt = (nrows + nth - 1)/nth;
|
||||
auto first = npt*ith;
|
||||
auto last = std::min(first + npt, nrows);
|
||||
if (last <= first) return;
|
||||
|
||||
auto order = (ggml_sort_order)dst->op_params[0];
|
||||
int nk = dst->op_params[1];
|
||||
|
||||
int ne00 = src->ne[0];
|
||||
thread_local std::vector<std::pair<float,int>> aux;
|
||||
if ((int)aux.size() < ne00) aux.resize(ne00);
|
||||
|
||||
for (int ir = first; ir < last; ++ir) {
|
||||
auto data = (const float *)((const char *)src->data + ir*src->nb[1]);
|
||||
for (int j = 0; j < ne00; ++j) aux[j] = {data[j], j};
|
||||
if (nk < ne00) {
|
||||
if (order == GGML_SORT_ORDER_DESC) {
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end(), std::greater<std::pair<float,int>>{});
|
||||
} else {
|
||||
std::partial_sort(aux.begin(), aux.begin() + nk, aux.end());
|
||||
}
|
||||
} else {
|
||||
if (order == GGML_SORT_ORDER_DESC) {
|
||||
std::sort(aux.begin(), aux.end(), std::greater<std::pair<float,int>>{});
|
||||
} else {
|
||||
std::sort(aux.begin(), aux.end());
|
||||
}
|
||||
}
|
||||
auto y = (int32_t *)((char *)dst->data + ir*dst->nb[1]);
|
||||
for (int j = 0; j < ne00; ++j) y[j] = aux[j].second;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
25
ggml/src/iqk/iqk_cpu_ops.h
Normal file
25
ggml/src/iqk/iqk_cpu_ops.h
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Copyright (C) 2025 Iwan Kawrakow
|
||||
// MIT license
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "iqk_config.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ggml_tensor;
|
||||
|
||||
void iqk_grouped_top_k(struct ggml_tensor * dst, int ith, int nth);
|
||||
|
||||
void iqk_argsort(struct ggml_tensor * dst, int ith, int nth);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user