iq3_ks: Fix CUDA dot product

This commit is contained in:
Iwan Kawrakow
2024-10-09 18:09:35 +03:00
parent 252c6b2d82
commit 0317ba5a01

View File

@@ -559,9 +559,9 @@ __device__ __forceinline__ float vec_dot_iq3_ks_q8_1(
const int hshift = 4*(1-ib128);
const uint16_t * values1 = iq3k_table + ((bq3->scales[4*ib128+0] << 6) & 0x40);
const uint16_t * values2 = iq3k_table + ((bq3->scales[4*ib128+0] << 6) & 0x40);
const uint16_t * values3 = iq3k_table + ((bq3->scales[4*ib128+0] << 6) & 0x40);
const uint16_t * values4 = iq3k_table + ((bq3->scales[4*ib128+0] << 6) & 0x40);
const uint16_t * values2 = iq3k_table + ((bq3->scales[4*ib128+1] << 6) & 0x40);
const uint16_t * values3 = iq3k_table + ((bq3->scales[4*ib128+2] << 6) & 0x40);
const uint16_t * values4 = iq3k_table + ((bq3->scales[4*ib128+3] << 6) & 0x40);
const int * q8;
int sumi[4] = {0, 0, 0, 0};