From 2396c41ef8b56c541005d8d0b39384d7325f8cd8 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Thu, 13 Nov 2025 12:49:34 +0200 Subject: [PATCH] Fix q5_0_r4 and q6_0_r4 also on Zen4 --- ggml/src/iqk/iqk_gemm_legacy_quants.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index d5514e33..23c6bd60 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -1324,12 +1324,12 @@ static void mul_mat_q5_0_r4_q8_2(int n, const void * vx, size_t bx, const DataIn for (int ib = 4*(nb/4); ib < nb; ++ib) { auto scales = prepare(iq5l[ib], iq5h[ib]); for (int iy = 0; iy < nrc_y; ++iy) { - auto qy = (const block_q8_1 *)q8.y[iy]; + auto qy = (const block_q8_2 *)q8.y[iy]; auto sumi = dot(_mm256_loadu_si256((const __m256i*)qy[ib].qs)); - ggml_bf16_t d{qy[ib].d}, s{qy[ib].s}; - auto dy = _mm512_set1_ps(GGML_BF16_TO_FP32(d)); + auto [d8, m8] = ScaleHelperQ8_2::prepare1(qy + ib); + auto dy = _mm512_set1_ps(d8); acc[2*iy+0] = _mm512_fmadd_ps(_mm512_mul_ps(scales, dy), _mm512_cvtepi32_ps(sumi), acc[2*iy+0]); - acc[2*iy+1] = _mm512_fmadd_ps(scales, _mm512_set1_ps(GGML_BF16_TO_FP32(s)), acc[2*iy+1]); + acc[2*iy+1] = _mm512_fmadd_ps(scales, _mm512_set1_ps(m8), acc[2*iy+1]); } } for (int iy = 0; iy < nrc_y; ++iy) { @@ -1494,12 +1494,12 @@ static void mul_mat_q6_0_r4_q8_2(int n, const void * vx, size_t bx, const DataIn for (int ib = 4*(nb/4); ib < nb; ++ib) { auto scales = prepare(iq6l[ib], iq6h[ib]); for (int iy = 0; iy < nrc_y; ++iy) { - auto qy = (const block_q8_1 *)q8.y[iy]; + auto qy = (const block_q8_2 *)q8.y[iy]; auto sumi = dot(_mm256_loadu_si256((const __m256i*)qy[ib].qs)); - ggml_bf16_t d{qy[ib].d}, s{qy[ib].s}; - auto dy = _mm512_set1_ps(GGML_BF16_TO_FP32(d)); + auto [d8, m8] = ScaleHelperQ8_2::prepare1(qy + ib); + auto dy = _mm512_set1_ps(d8); acc[2*iy+0] = _mm512_fmadd_ps(_mm512_mul_ps(scales, dy), _mm512_cvtepi32_ps(sumi), acc[2*iy+0]); - acc[2*iy+1] = _mm512_fmadd_ps(scales, _mm512_set1_ps(GGML_BF16_TO_FP32(s)), acc[2*iy+1]); + acc[2*iy+1] = _mm512_fmadd_ps(scales, _mm512_set1_ps(m8), acc[2*iy+1]); } } for (int iy = 0; iy < nrc_y; ++iy) {