From 8e30a22c80d1b0ab20147f55ce8cb03deb47ce48 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Sat, 23 Aug 2025 19:07:17 +0300 Subject: [PATCH] This does it for iq4_nl, including FA --- ggml/src/ggml.c | 2 +- ggml/src/iqk/fa/iqk_fa_templates.h | 5 ----- ggml/src/iqk/iqk_gemm_legacy_quants.cpp | 24 +++++------------------- ggml/src/iqk/iqk_mul_mat.cpp | 2 -- 4 files changed, 6 insertions(+), 27 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 7788256b..d671d539 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1311,7 +1311,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_iq4_nl, .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref, .vec_dot = ggml_vec_dot_iq4_nl_q8_0, -#if defined HAVE_FANCY_SIMD +#if __AVX2__ .vec_dot_type = GGML_TYPE_Q8_2_X4, #else .vec_dot_type = GGML_TYPE_Q8_0_X4, diff --git a/ggml/src/iqk/fa/iqk_fa_templates.h b/ggml/src/iqk/fa/iqk_fa_templates.h index 1971c472..614a2936 100644 --- a/ggml/src/iqk/fa/iqk_fa_templates.h +++ b/ggml/src/iqk/fa/iqk_fa_templates.h @@ -615,13 +615,8 @@ struct HelperIQ4nl final : public BaseHelper { constexpr static int block_size_q = QK8_0; #else HelperIQ4nl(const char * data, int stride) : Base(data, stride) {} -#ifdef HAVE_FANCY_SIMD using block_q8 = block_q8_2; constexpr static int block_size_q = QK8_2; -#else - using block_q8 = block_q8_0; - constexpr static int block_size_q = QK8_0; -#endif #endif // Needed for v * softmax(k * q) diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index b3287eb5..5c7b7f07 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -605,14 +605,6 @@ struct IQ4_NL_Dequantizer { } }; -struct IQ4_NL0_Dequantizer { - Dequantizer4bit b4; - const __m256i values = load_iq4k_values_256(); - inline __m256i dequant(const block_iq4_nl * x) const { - return _mm256_shuffle_epi8(values, b4.dequant(x->qs)); - } -}; - //============================= static inline __m128i load_unsigned_mxfp4_values_128() { static const uint8_t kvalues_mxfp4_unsigned[16] = {12, 13, 14, 15, 16, 18, 20, 24, 12, 11, 10, 9, 8, 6, 4, 0}; @@ -785,9 +777,9 @@ struct IQ4_NL_Unpacker final : public Q_Unpacker { +struct IQ4_NL_Unpacker final : public Q_Unpacker { IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} - using Sum4T = Sum4TypeQ80; + using Sum4T = Sum4TypeQ82S; inline static int block_size() { return QK4_NL; } }; #endif @@ -1934,7 +1926,7 @@ template void set_functions(std::array || std::is_same_v || @@ -1953,7 +1945,7 @@ bool iqk_convert_legacy_quants_q8_r8(int type, int n, const void * vx, size_t bx case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8>(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; - case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; + case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_Q8_0 : iqk_convert_q80_q80_r8(n, vx, bx, vy, nrc_x); break; case GGML_TYPE_MXFP4 : iqk_convert_qX_q80_r8(n, vx, bx, vy, nrc_x); break; default: return false; @@ -1994,15 +1986,9 @@ bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array(kernels); -#ifndef HAVE_FANCY_SIMD - expected_typeB = GGML_TYPE_Q8_0_X4; -#endif break; case GGML_TYPE_MXFP4: set_functions(kernels); -//#ifndef HAVE_FANCY_SIMD -// expected_typeB = GGML_TYPE_Q8_0_X4; -//#endif break; case GGML_TYPE_Q4_0_R8: IQK_SET_MUL_MAT_FUNCTIONS(mul_mat_q4_0_r8_q8_2, kernels) @@ -3362,7 +3348,7 @@ inline std::pair mul_mat_kernel(int int_typeA, int nq) { #ifdef HAVE_FANCY_SIMD MAKE_FUNCS(mul_mat_qX_1_q8_2_T= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q5_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; -#ifdef HAVE_FANCY_SIMD case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; -#endif case GGML_TYPE_MXFP4 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ1_KT : return nrc_y >= 16 ? GGML_TYPE_Q8_0_R8 : type;