mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 15:44:10 +00:00
iq4_kt: minor tweaks
This commit is contained in:
@@ -423,7 +423,7 @@ static __global__ void dequantize_block_iq4_kt(const void * __restrict__ vx, dst
|
||||
const uint16_t * ql = (const uint16_t *)x[i].ql;
|
||||
uint32_t idx1 = ql[2*ib+0] + 4096;
|
||||
uint32_t idx2 = ql[2*ib+1] + 4096;
|
||||
const float dl = scale * x[i].scales[ib/8] * 31.75f;
|
||||
const float dl = scale * x[i].scales[ib/8] * 31.75f * 1.01f;
|
||||
uint32_t s[2];
|
||||
const half * h = (const half *)s;
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
|
||||
@@ -181,7 +181,7 @@ static __global__ void dequantize_mul_mat_vec_iq4_kt(const void * __restrict__ v
|
||||
if (row > nrows) return;
|
||||
|
||||
const float * dptr = (const float *)((const char *)vx + row*row_size);
|
||||
const float d = *dptr * 31.75f * 1.015f;
|
||||
const float d = *dptr * 31.75f * 1.01f;
|
||||
const block_iq4_kt * x = (const block_iq4_kt *)(dptr + 1);
|
||||
|
||||
const int num_blocks_per_row = ncols / QK_K;
|
||||
|
||||
Reference in New Issue
Block a user