mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
iq4_nl: faster quantization (#76)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -14347,15 +14347,21 @@ size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int64_t n
|
|||||||
|
|
||||||
// ============================ 4-bit non-linear quants
|
// ============================ 4-bit non-linear quants
|
||||||
|
|
||||||
static inline int best_index_int8(int n, const int8_t * val, float x) {
|
static const int8_t iq4nl_index[241] = {
|
||||||
if (x <= val[0]) return 0;
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
if (x >= val[n-1]) return n-1;
|
1, 17, 17, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
int ml = 0, mu = n-1;
|
3, 3, 3, 3, 3, 3, 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||||
while (mu-ml > 1) {
|
5, 5, 21, 21, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 22, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 23, 8, 8, 8, 8,
|
||||||
int mav = (ml+mu)/2;
|
8, 8, 8, 8, 8, 8, 24, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 26, 26,
|
||||||
if (x < val[mav]) mu = mav; else ml = mav;
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 27, 27, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 28, 13, 13, 13,
|
||||||
}
|
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
14, 14, 14, 14, 30, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15
|
||||||
|
};
|
||||||
|
static inline int best_index_iq4nl(const int8_t * values, float x) {
|
||||||
|
int ix = (int)x - values[0];
|
||||||
|
if (ix < 0 || ix >= 241) return ix < 0 ? 0 : 15;
|
||||||
|
ix = iq4nl_index[ix];
|
||||||
|
return ix < 16 ? ix : x - values[ix-16] < values[ix-15] - x ? ix-16 : ix-15;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x,
|
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x,
|
||||||
@@ -14398,7 +14404,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|||||||
float sumqx = 0, sumq2 = 0;
|
float sumqx = 0, sumq2 = 0;
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (int j = 0; j < block_size; ++j) {
|
||||||
float al = id*xb[j];
|
float al = id*xb[j];
|
||||||
int l = best_index_int8(16, values, al);
|
int l = best_index_iq4nl(values, al);
|
||||||
Lb[j] = l;
|
Lb[j] = l;
|
||||||
float q = values[l];
|
float q = values[l];
|
||||||
float w = weight[j];
|
float w = weight[j];
|
||||||
@@ -14412,7 +14418,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|||||||
sumqx = sumq2 = 0;
|
sumqx = sumq2 = 0;
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (int j = 0; j < block_size; ++j) {
|
||||||
float al = id*xb[j];
|
float al = id*xb[j];
|
||||||
int l = best_index_int8(16, values, al);
|
int l = best_index_iq4nl(values, al);
|
||||||
float q = values[l];
|
float q = values[l];
|
||||||
float w = weight[j];
|
float w = weight[j];
|
||||||
sumqx += w*q*xb[j];
|
sumqx += w*q*xb[j];
|
||||||
@@ -14443,7 +14449,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|||||||
uint8_t * Lb = L + ib*block_size;
|
uint8_t * Lb = L + ib*block_size;
|
||||||
const float * xb = x + ib*block_size;
|
const float * xb = x + ib*block_size;
|
||||||
for (int j = 0; j < block_size; ++j) {
|
for (int j = 0; j < block_size; ++j) {
|
||||||
Lb[j] = best_index_int8(16, values, idl*xb[j]);
|
Lb[j] = best_index_iq4nl(values, idl*xb[j]);
|
||||||
}
|
}
|
||||||
l += 32;
|
l += 32;
|
||||||
uint8_t l_l = l & 0xf;
|
uint8_t l_l = l & 0xf;
|
||||||
@@ -14457,7 +14463,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|||||||
if (ntry > 0) {
|
if (ntry > 0) {
|
||||||
float id = scales[0] ? 1/scales[0] : 0;
|
float id = scales[0] ? 1/scales[0] : 0;
|
||||||
for (int j = 0; j < super_block_size; ++j) {
|
for (int j = 0; j < super_block_size; ++j) {
|
||||||
L[j] = best_index_int8(16, values, id*x[j]);
|
L[j] = best_index_iq4nl(values, id*x[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user