diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index eaf27c32..32ce78f2 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -1837,7 +1837,7 @@ bool iqk_convert_legacy_quants_q8_r8(int type, int n, const void * vx, size_t bx case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; - //case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8(n, vx, bx, vy, nrc_x); break; + case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8>(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q8_0 : iqk_convert_q80_q80_r8(n, vx, bx, vy, nrc_x); break; diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index a8700a54..cb5af73f 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -261,6 +261,7 @@ struct MulMat { case GGML_TYPE_Q4_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q4_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; + case GGML_TYPE_Q5_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; @@ -412,7 +413,7 @@ bool iqk_convert_repack(int typeA, int n, const void * vx, size_t bx, void * vy, case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: - //case GGML_TYPE_Q5_1: + case GGML_TYPE_Q5_1: case GGML_TYPE_Q6_0: case GGML_TYPE_Q8_0: case GGML_TYPE_IQ4_NL: