From e08e292beaa25ed07d5382797072ecdf646b2edd Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Tue, 18 Feb 2025 16:39:45 +0200 Subject: [PATCH] q8_KV_r8: don't use nrc_y = 16 on Zen4 This is faster - 350 t/s. Why? Much better than the 290 t/s we had before, but still slower than the 370 t/s for q8_k_r8. --- ggml/src/iqk/iqk_mul_mat.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index e040cf91..e5c0bbf6 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -9367,9 +9367,9 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) { mm.funcs[5] = mul_mat_q8_KV_r8_q8_KV<6>; mm.funcs[6] = mul_mat_q8_KV_r8_q8_KV<7>; mm.funcs[7] = mul_mat_q8_KV_r8_q8_KV<8>; -#ifdef HAVE_FANCY_SIMD - mm.func16 = mul_mat_q8_KV_r8_q8_KV<16>; -#endif +//#ifdef HAVE_FANCY_SIMD +// mm.func16 = mul_mat_q8_KV_r8_q8_KV<16>; +//#endif expected_typeB = GGML_TYPE_Q8_KV; break; case GGML_TYPE_IQ4_K_R4: