mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-28 18:32:04 +00:00
POC: per row scale
This is a POC how to work around opinionated ggml to have scales per row rather than per block. Only implemened for Zen4 and only for iq2_tn.
This commit is contained in:
@@ -2517,6 +2517,7 @@ extern "C" {
|
||||
int64_t ncols; // number of columns to process simultaneously
|
||||
ggml_gemv_t gemv;
|
||||
ggml_gemm_t gemm;
|
||||
int64_t row_meta_size;
|
||||
} ggml_type_traits_t;
|
||||
|
||||
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
||||
|
||||
Reference in New Issue
Block a user