iq4_ks: minor PPL improvement

This commit is contained in:
Iwan Kawrakow
2024-11-24 12:01:18 +02:00
parent 70815ec5b2
commit 65ebc6f986
2 changed files with 21 additions and 1 deletions

View File

@@ -692,7 +692,7 @@ static __global__ void dequantize_block_iq4_ks(const void * __restrict__ vx, dst
int64_t ii = blockIdx.x;
int64_t row = (QK_K * ii) / n_per_row;
const char * cx = (const char *)vx + row * row_size;
float scale = *(const float *)cx;
float scale = *(const float *)cx * 1.01f;
const block_iq4_ks * x = (const block_iq4_ks *)(cx + sizeof(float));
const int64_t i = ii - (row*n_per_row)/QK_K;

View File

@@ -2583,6 +2583,26 @@ static void quantize_row_iq4_k_impl_bs128(const int super_block_size, const int
if (sumq2_m > 0 && sumqx_m*sumqx_m > best*sumq2_m) {
d = sumqx_m/sumq2_m; best = d*sumqx_m;
}
id = -max/shifted_values[0];
sumqx_p = sumq2_p = sumqx_m = sumq2_m = 0;
for (int j = 0; j < block_size; ++j) {
float w = weight[j];
float al = id*xb[j];
int l = best_index_iq4nl(values, al);
float q = values[l];
sumqx_p += w*q*xb[j];
sumq2_p += w*q*q;
l = best_index_iq4nl(values, -al);
q = values[l];
sumqx_m += w*q*xb[j];
sumq2_m += w*q*q;
}
if (sumq2_p > 0 && sumqx_p*sumqx_p > best*sumq2_p) {
d = sumqx_p/sumq2_p; best = d * sumqx_p; is_shifted = true;
}
if (sumq2_m > 0 && sumqx_m*sumqx_m > best*sumq2_m) {
d = sumqx_m/sumq2_m; best = d * sumqx_m; is_shifted = true;
}
for (int itry = -ntry; itry <= ntry; ++itry) {
id = (itry + values[0])/max;
sumqx_p = sumq2_p = 0;