From f2ced256b4b47f22e7374cd0b1cd311efc49c271 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Fri, 7 Jun 2024 15:21:16 +0300 Subject: [PATCH] iqk_mul_mat: fp16 tweaks Use 4x3 tiling on a real AVX2 CPU (with only 16 vector registers). This works best for the Ryzen-5975WX. --- iqk_mul_mat.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 905a7f9b..72147615 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -2434,7 +2434,9 @@ bool MulMat::set_mul_mat(int typeA, int ne00, MulMat& mm, int& row_size_q8, int mm.funcs[0] = mul_mat_f16_f32_T<1>; mm.funcs[1] = mul_mat_f16_f32_T<2>; mm.funcs[2] = mul_mat_f16_f32_T<3>; +#ifdef __AVX512F__ mm.funcs[3] = mul_mat_f16_f32_T<4>; +#endif row_size_q8 = ggml_row_size(GGML_TYPE_F32, ne00); return true; }