mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-05 19:40:19 +00:00
iq3_k, iq5_k: faster quantization
Just use the same trick as iq4_k
This commit is contained in:
@@ -632,14 +632,17 @@ void vec_dot_iq2_k_q8_k(int n, float * GGML_RESTRICT s, size_t bs, const void *
|
||||
// ============================================== iq3_k
|
||||
//
|
||||
namespace {
|
||||
static int8_t iq3nl_index[69] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5
|
||||
const int8_t iq3nl_index[111] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9,
|
||||
9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 11, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 14, 14, 7, 7, 7, 7, 7, 7, 7, 7, 7
|
||||
};
|
||||
static inline int best_index_iq3nl(const int8_t * values, float x) {
|
||||
int index = x < values[1] ? 0 : x >= values[6] ? 6 : iq3nl_index[(int)x - values[1]];
|
||||
return x - values[index] < values[index+1] - x ? index : index+1;
|
||||
inline int best_index_iq3nl(const int8_t * values, float x) {
|
||||
int ix = (int)x - values[0];
|
||||
if (ix < 0 || ix >= 111) return ix < 0 ? 0 : 7;
|
||||
ix = iq3nl_index[ix];
|
||||
return ix < 8 ? ix : x - values[ix-8] < values[ix-7] - x ? ix-8 : ix-7;
|
||||
}
|
||||
|
||||
static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
|
||||
@@ -1290,21 +1293,21 @@ void vec_dot_iq5_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
|
||||
}
|
||||
|
||||
namespace {
|
||||
static int8_t iq5nl_index[248] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16,
|
||||
16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21,
|
||||
21, 21, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29,
|
||||
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
|
||||
const int8_t iq5nl_index[248] = {
|
||||
0, 0, 0, 0, 0, 0, 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 33, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34, 34, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 35, 35, 4, 4, 4, 4, 4, 4, 4, 36, 36, 5, 5, 5, 5, 5, 5, 5, 37, 37, 6, 6, 6, 6, 6, 6,
|
||||
6, 38, 7, 7, 7, 7, 7, 7, 39, 39, 8, 8, 8, 8, 8, 40, 40, 9, 9, 9, 9, 9, 41, 41, 10, 10, 10, 10, 10, 42, 11, 11,
|
||||
11, 11, 11, 43, 12, 12, 12, 12, 12, 44, 13, 13, 13, 13, 13, 45, 14, 14, 14, 14, 14, 46, 15, 15, 15, 15, 47, 47, 16, 16, 16, 16,
|
||||
48, 17, 17, 17, 17, 17, 49, 18, 18, 18, 18, 18, 50, 19, 19, 19, 19, 19, 51, 20, 20, 20, 20, 20, 52, 21, 21, 21, 21, 21, 53, 53,
|
||||
22, 22, 22, 22, 22, 54, 54, 23, 23, 23, 23, 23, 23, 55, 24, 24, 24, 24, 24, 24, 24, 56, 25, 25, 25, 25, 25, 25, 25, 57, 57, 26,
|
||||
26, 26, 26, 26, 26, 26, 58, 58, 27, 27, 27, 27, 27, 27, 27, 27, 59, 28, 28, 28, 28, 28, 28, 28, 28, 28, 60, 29, 29, 29, 29, 29,
|
||||
29, 29, 29, 29, 29, 61, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 62, 31, 31, 31, 31, 31, 31
|
||||
};
|
||||
static inline int best_index_iq5nl(const int8_t * values, float x) {
|
||||
if (x <= values[ 0]) return 0;
|
||||
if (x >= values[31]) return 31;
|
||||
int index = iq5nl_index[(int)x - values[0]];
|
||||
return x - values[index] < values[index+1] - x ? index : index+1;
|
||||
inline int best_index_iq5nl(const int8_t * values, float x) {
|
||||
int ix = (int)x - values[0];
|
||||
if (ix < 0 || ix >= 247) return ix < 0 ? 0 : 31;
|
||||
ix = iq5nl_index[ix];
|
||||
return ix < 32 ? ix : x - values[ix-32] < values[ix-31] - x ? ix-32 : ix-31;
|
||||
}
|
||||
|
||||
void quantize_row_iq5_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
|
||||
|
||||
Reference in New Issue
Block a user