diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index bca67613..f3a23727 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1301,14 +1301,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_iq4_nl, .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref, .vec_dot = ggml_vec_dot_iq4_nl_q8_0, -#if GGML_USE_IQK_MULMAT #if defined HAVE_FANCY_SIMD .vec_dot_type = GGML_TYPE_Q8_2_X4, #else .vec_dot_type = GGML_TYPE_Q8_0_X4, -#endif -#else - .vec_dot_type = GGML_TYPE_Q8_0, #endif .nrows = 1, .row_meta_size = 0, @@ -1335,7 +1331,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_mxfp4, .from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref, .vec_dot = vec_dot_mxfp4_q8_0_x4, -#if defined HAVE_FANCY_SIMD +#if defined __AVX2__ .vec_dot_type = GGML_TYPE_Q8_2_X4, #else .vec_dot_type = GGML_TYPE_Q8_0_X4, diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index 7ba36215..68e10612 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -105,6 +105,21 @@ struct ScaleHelperQ_0 { template inline float prepare1(float d, const Q * y) const { return d*prepare1(y); } }; +struct ScaleHelperQ_0_MXFP4 { + float scales[4]; + template + inline __m128 prepare4(const Q * y) { + for (int j = 0; j < 4; ++j) scales[j] = GGML_E8M0_TO_FP32_HALF(y[j].e); + return _mm_loadu_ps(scales); + } + template + inline __m128 prepare4(__m128 other_scales, const Q * y) { + return _mm_mul_ps(other_scales, prepare4(y)); + } + template inline float prepare1(const Q * y) const { return GGML_E8M0_TO_FP32_HALF(y->e); } + template inline float prepare1(float d, const Q * y) const { return d*prepare1(y); } +}; + template struct ScaleHelperQ_0_1 { ggml_half scales8[4]; @@ -582,11 +597,7 @@ static inline __m256i load_mxfp4_values_256() { struct MXFP4_Dequantizer { Dequantizer4bit b4; -#ifdef HAVE_FANCY_SIMD const __m256i values = load_unsigned_mxfp4_values_256(); -#else - const __m256i values = load_mxfp4_values_256(); -#endif inline __m256i dequant(const block_mxfp4 * x) const { return _mm256_shuffle_epi8(values, b4.dequant(x->qs)); } @@ -712,20 +723,20 @@ struct Q4_0_1_Unpacker final : public Q_Unpacker using Sum4T = Sum4q4; inline static int block_size() { return QK4_0; } }; +struct MXFP4_Unpacker final : public Q_Unpacker, MXFP4_Dequantizer> { + MXFP4_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} + using Sum4T = Sum4TypeQ82; + inline static int block_size() { return QK4_NL; } +}; #ifdef HAVE_FANCY_SIMD struct IQ4_NL_Unpacker final : public Q_Unpacker, IQ4_NL_Dequantizer> { IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} using Sum4T = Sum4TypeQ82; inline static int block_size() { return QK4_NL; } }; -struct IQ4_MXFP4_Unpacker final : public Q_Unpacker, MXFP4_Dequantizer> { - IQ4_MXFP4_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} - using Sum4T = Sum4TypeQ82; - inline static int block_size() { return QK4_NL; } -}; #else -struct IQ4_MXFP4_Unpacker final : public Q_Unpacker { - IQ4_MXFP4_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} +struct IQ4_NL_Unpacker final : public Q_Unpacker { + IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} using Sum4T = Sum4TypeQ80; inline static int block_size() { return QK4_NL; } }; @@ -1867,7 +1878,7 @@ template void set_functions(std::array || std::is_same_v) { IQK_SET_MUL_MAT_FUNCTIONS_T(mul_mat_qX_1_q8_2_T, Dequantizer, funcs) } - else if constexpr (std::is_same_v || std::is_same_v) { + else if constexpr (std::is_same_v) { #ifdef HAVE_FANCY_SIMD IQK_SET_MUL_MAT_FUNCTIONS_T(mul_mat_qX_1_q8_2_T, Dequantizer, funcs) #else @@ -1875,7 +1886,8 @@ template void set_functions(std::array || std::is_same_v || - std::is_same_v || std::is_same_v) { + std::is_same_v || std::is_same_v || + std::is_same_v) { IQK_SET_MUL_MAT_FUNCTIONS_T(mul_mat_qX_1_q8_2_T, Dequantizer, funcs) } } @@ -1936,10 +1948,10 @@ bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array(kernels); -#ifndef HAVE_FANCY_SIMD - expected_typeB = GGML_TYPE_Q8_0_X4; -#endif + set_functions(kernels); +//#ifndef HAVE_FANCY_SIMD +// expected_typeB = GGML_TYPE_Q8_0_X4; +//#endif break; case GGML_TYPE_Q4_0_R8: IQK_SET_MUL_MAT_FUNCTIONS(mul_mat_q4_0_r8_q8_2, kernels) diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 7acdd9ef..f8624bfc 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -265,7 +265,9 @@ struct MulMat { case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q5_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q6_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; +#ifdef HAVE_FANCY_SIMD case GGML_TYPE_IQ4_NL : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; +#endif case GGML_TYPE_MXFP4 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q8_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_IQ1_KT : return nrc_y >= 16 ? GGML_TYPE_Q8_0_R8 : type;