mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 15:44:10 +00:00
iq2k: quantization improvement
I was not using the ciorrect scale sign to compute mse when checking the solution with the sign flipped. iq4_kss is now almost on par with the 4-bit Trellis.
This commit is contained in:
@@ -715,7 +715,7 @@ static __global__ void dequantize_block_iq4_kss(const void * __restrict__ vx, ds
|
||||
int64_t ii = blockIdx.x;
|
||||
int64_t row = (QK_K * ii) / n_per_row;
|
||||
const char * cx = (const char *)vx + row * row_size;
|
||||
float scale = *(const float *)cx;
|
||||
float scale = *(const float *)cx * 1.01f;
|
||||
const block_iq4_kss * x = (const block_iq4_kss *)(cx + sizeof(float));
|
||||
const int64_t i = ii - (row*n_per_row)/QK_K;
|
||||
|
||||
|
||||
@@ -2898,7 +2898,7 @@ static void quantize_row_iq4_kss_impl(int n_per_row, const float * x, char * cy,
|
||||
vm |= (best_index_iq4nl(values, -al) << 4*j);
|
||||
}
|
||||
vp = prune_iq4ks(vp, values, xv, wv, this_d);
|
||||
vm = prune_iq4ks(vm, values, xv, wv, this_d);
|
||||
vm = prune_iq4ks(vm, values, xv, wv, -this_d);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
float w = wv[j];
|
||||
float q = values[(vp >> 4*j) & 0xf];
|
||||
@@ -2938,7 +2938,7 @@ static void quantize_row_iq4_kss_impl(int n_per_row, const float * x, char * cy,
|
||||
vm |= (best_index_iq4nl(shifted_values, -al) << 4*j);
|
||||
}
|
||||
vp = prune_iq4ks(vp, shifted_values, xv, wv, this_d);
|
||||
vm = prune_iq4ks(vm, shifted_values, xv, wv, this_d);
|
||||
vm = prune_iq4ks(vm, shifted_values, xv, wv, -this_d);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
float w = wv[j];
|
||||
float q = shifted_values[(vp >> 4*j) & 0xf];
|
||||
@@ -3002,8 +3002,8 @@ static void quantize_row_iq4_kss_impl(int n_per_row, const float * x, char * cy,
|
||||
vp |= (best_index_iq4nl( values, al) << 4*j);
|
||||
vm |= (best_index_iq4nl(shifted_values, al) << 4*j);
|
||||
}
|
||||
vp = prune_iq4ks(vp, values, xv, wv, dl);
|
||||
vm = prune_iq4ks(vm, shifted_values, xv, wv, dl);
|
||||
vp = prune_iq4ks(vp, values, xv, wv, dl);
|
||||
vm = prune_iq4ks(vm, shifted_values, xv, wv, dl);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
float w = wv[j];
|
||||
float q = values[(vp >> 4*j) & 0xf];
|
||||
|
||||
Reference in New Issue
Block a user