diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu index 84120ac7..8938fce7 100644 --- a/ggml/src/ggml-cuda/convert.cu +++ b/ggml/src/ggml-cuda/convert.cu @@ -692,7 +692,7 @@ static __global__ void dequantize_block_iq4_ks(const void * __restrict__ vx, dst int64_t ii = blockIdx.x; int64_t row = (QK_K * ii) / n_per_row; const char * cx = (const char *)vx + row * row_size; - float scale = *(const float *)cx; + float scale = *(const float *)cx * 1.01f; const block_iq4_ks * x = (const block_iq4_ks *)(cx + sizeof(float)); const int64_t i = ii - (row*n_per_row)/QK_K; diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index 0891c4f2..10713b05 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -2583,6 +2583,26 @@ static void quantize_row_iq4_k_impl_bs128(const int super_block_size, const int if (sumq2_m > 0 && sumqx_m*sumqx_m > best*sumq2_m) { d = sumqx_m/sumq2_m; best = d*sumqx_m; } + id = -max/shifted_values[0]; + sumqx_p = sumq2_p = sumqx_m = sumq2_m = 0; + for (int j = 0; j < block_size; ++j) { + float w = weight[j]; + float al = id*xb[j]; + int l = best_index_iq4nl(values, al); + float q = values[l]; + sumqx_p += w*q*xb[j]; + sumq2_p += w*q*q; + l = best_index_iq4nl(values, -al); + q = values[l]; + sumqx_m += w*q*xb[j]; + sumq2_m += w*q*q; + } + if (sumq2_p > 0 && sumqx_p*sumqx_p > best*sumq2_p) { + d = sumqx_p/sumq2_p; best = d * sumqx_p; is_shifted = true; + } + if (sumq2_m > 0 && sumqx_m*sumqx_m > best*sumq2_m) { + d = sumqx_m/sumq2_m; best = d * sumqx_m; is_shifted = true; + } for (int itry = -ntry; itry <= ntry; ++itry) { id = (itry + values[0])/max; sumqx_p = sumq2_p = 0;