From de7fe92833c394e5fa54b89634cc9dce34c54349 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 11 Nov 2024 19:59:15 +0200 Subject: [PATCH] iq4_kt: minor tweaks --- ggml/src/ggml-cuda/convert.cu | 2 +- ggml/src/ggml-cuda/dmmv.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu index 2ebfe573..4ffe5a98 100644 --- a/ggml/src/ggml-cuda/convert.cu +++ b/ggml/src/ggml-cuda/convert.cu @@ -423,7 +423,7 @@ static __global__ void dequantize_block_iq4_kt(const void * __restrict__ vx, dst const uint16_t * ql = (const uint16_t *)x[i].ql; uint32_t idx1 = ql[2*ib+0] + 4096; uint32_t idx2 = ql[2*ib+1] + 4096; - const float dl = scale * x[i].scales[ib/8] * 31.75f; + const float dl = scale * x[i].scales[ib/8] * 31.75f * 1.01f; uint32_t s[2]; const half * h = (const half *)s; for (int j = 0; j < 4; ++j) { diff --git a/ggml/src/ggml-cuda/dmmv.cu b/ggml/src/ggml-cuda/dmmv.cu index d20317b2..6236ec63 100644 --- a/ggml/src/ggml-cuda/dmmv.cu +++ b/ggml/src/ggml-cuda/dmmv.cu @@ -181,7 +181,7 @@ static __global__ void dequantize_mul_mat_vec_iq4_kt(const void * __restrict__ v if (row > nrows) return; const float * dptr = (const float *)((const char *)vx + row*row_size); - const float d = *dptr * 31.75f * 1.015f; + const float d = *dptr * 31.75f * 1.01f; const block_iq4_kt * x = (const block_iq4_kt *)(dptr + 1); const int num_blocks_per_row = ncols / QK_K;