From 4cdf9b333feeaf7a4469ba8ca74c260ce38c6c1e Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 2 Oct 2024 10:34:24 +0300 Subject: [PATCH] Adding q6_0: CUDA mmvq works --- ggml/src/ggml-cuda/vecdotq.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/vecdotq.cuh b/ggml/src/ggml-cuda/vecdotq.cuh index 84caa575..7baabb7a 100644 --- a/ggml/src/ggml-cuda/vecdotq.cuh +++ b/ggml/src/ggml-cuda/vecdotq.cuh @@ -58,8 +58,8 @@ template static __device__ __forceinline__ float vec_dot_q6_0_q8_1_imp #pragma unroll for (int i = 0; i < vdr; ++i) { - const int vi0 = ((vl[i] >> 0) & 0x0F0F0F0F) | ((vh[i/2] << 4) & 0x30303030); - const int vi1 = ((vl[i] >> 4) & 0x0F0F0F0F) | ((vh[i/2] << 2) & 0x30303030); + const int vi0 = ((vl[i] >> 0) & 0x0F0F0F0F) | ((vh[i] << 4) & 0x30303030); + const int vi1 = ((vl[i] >> 4) & 0x0F0F0F0F) | ((vh[i] << 2) & 0x30303030); // SIMD dot product of quantized values sumi = ggml_cuda_dp4a(vi0, u[2*i+0], sumi);