mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-28 18:32:04 +00:00
SER - Smart Expert Reduction (#239)
* A better way to measure the cost of ggml_barrier * Smart expert selection * Add ser option to llama-bench --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -597,6 +597,7 @@ extern "C" {
|
||||
GGML_OP_ARANGE,
|
||||
GGML_OP_TIMESTEP_EMBEDDING,
|
||||
GGML_OP_ARGSORT,
|
||||
GGML_OP_ARGSORT_THRESH,
|
||||
GGML_OP_LEAKY_RELU,
|
||||
GGML_OP_SOFTCAP,
|
||||
GGML_OP_SOFT_CAP_MAX,
|
||||
@@ -1913,6 +1914,12 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_sort_order order);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_argsort_thresh(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int min_entries,
|
||||
float threshold);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_arange(
|
||||
struct ggml_context * ctx,
|
||||
float start,
|
||||
@@ -1924,6 +1931,12 @@ extern "C" {
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int k);
|
||||
GGML_API struct ggml_tensor * ggml_top_k_thresh(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int k,
|
||||
int min_entries,
|
||||
float thresh);
|
||||
|
||||
#define GGML_KQ_MASK_PAD 32
|
||||
|
||||
|
||||
Reference in New Issue
Block a user