diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index 4465eda2..162b55e6 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -2783,14 +2783,24 @@ void mul_mat_q8_0_r8_q8_0(int n, const void * vx, size_t bx, const DataInfo& inf } struct DeqQ40 { - const int8x16_t m8 = vdupq_n_s8(-8); - const int8x16_t ml = vdupq_n_s8(0xf); + const int8x16_t m8 = vdupq_n_s8(-8); + const uint8x16_t ml = vdupq_n_s8(0xf); inline int8x16x2_t dequant(const block_q4_0& x) const { auto bits = vld1q_u8(x.qs); return { vaddq_s8(vreinterpretq_s8_u8(vandq_u8(bits, ml)), m8), vaddq_s8(vreinterpretq_s8_u8(vshrq_n_u8(bits, 4)), m8) }; } }; +struct DeqIQ4NL { + const int8x16_t mt = load_values(); + const uint8x16_t ml = vdupq_n_s8(0xf); + inline int8x16x2_t dequant(const block_iq4_nl& x) const { + auto bits = vld1q_u8(x.qs); + return { vqtbl1q_s8(mt, vandq_u8(bits, ml)), vqtbl1q_s8(mt, vshrq_n_u8(bits, 4)) }; + } + static inline int8x16_t load_values() { return vld1q_s8(iq4k_values); } +}; + struct DeqQ50 { inline int8x16x2_t dequant(const block_q5_0& x) const { @@ -2873,7 +2883,7 @@ bool iqk_convert_legacy_quants_q8_r8(int type, int n, const void * vx, size_t bx case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; // case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8>(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; - // case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; + case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q8_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; default: return false; } diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index ed4b6efe..772cb7dc 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -275,6 +275,7 @@ struct MulMat { case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; + case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ2_KT : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ3_KT : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ4_KT : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type;