From aaa164773def027ed7cbc2c662c40f7deb4fb557 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Sat, 21 Jun 2025 16:00:22 +0200 Subject: [PATCH] q5_1 64.2 t/s -> 114.9 t/s. There is no repacked variant. --- ggml/src/iqk/iqk_gemm_legacy_quants.cpp | 18 +++++++++++++++++- ggml/src/iqk/iqk_mul_mat.cpp | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index 3b598718..ab6eb130 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -2893,6 +2893,22 @@ struct DeqQ50 { const uint8x16_t mh = vdupq_n_u8(0xf0); }; +struct DeqQ51 { + + inline int8x16x2_t dequant(const block_q5_1& x) const { + int8x16x2_t r; + bits.prepare1(x.qs, r.val); + auto qh = x.qh; + r.val[0] = vreinterpretq_s8_u8(vorrq_u8(vreinterpretq_u8_s8(r.val[0]), vandq_u8(mh, hbits.to_bytes(qh+0)))); + r.val[1] = vreinterpretq_s8_u8(vorrq_u8(vreinterpretq_u8_s8(r.val[1]), vandq_u8(mh, hbits.to_bytes(qh+2)))); + return r; + } + + Q4LegacyBits bits; + HighBit5Legacy hbits; + const uint8x16_t mh = vdupq_n_u8(0x10); +}; + struct DeqQ60 { inline int8x16x2_t dequant(const block_q6_0& x) const { @@ -2992,7 +3008,7 @@ bool iqk_convert_legacy_quants_q8_r8(int type, int n, const void * vx, size_t bx case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; - // case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8>(n, vx, bx, vy, nrc_x); break; + case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q8_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 0f91c92a..d6753f0a 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -274,6 +274,7 @@ struct MulMat { case GGML_TYPE_Q4_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q4_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; + case GGML_TYPE_Q5_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;