q6_0: this is slightly better

2026-01-26 17:20:01 +00:00 · 2024-10-02 18:07:55 +03:00
parent 9d1552a4fc
commit 38eb7fa499
1 changed files with 4 additions and 4 deletions
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -3231,12 +3231,12 @@ struct Q5_1_Dequantizer {
 struct Q6_0_1_Dequantizer {
    Dequantizer4bit b4;
    const __m256i mh = _mm256_set1_epi8(0x30);
-    const __m128i shift = _mm_set_epi64x(0, 4);
+    const __m256i shift1 = _mm256_set_epi64x(0, 2, 0, 4);
+    const __m256i shift2 = _mm256_set_epi64x(2, 0, 0, 0);
    inline __m256i dequant(const block_q6_0 * x) const {
        uint64_t aux64; std::memcpy(&aux64, x->qh, 8);
-        auto h128 = _mm_sllv_epi64(_mm_set1_epi64x(aux64), shift);
-        auto h256 = MM256_SET_M128I(_mm_srli_epi16(h128, 2), h128);
-        return _mm256_or_si256(b4.dequant(x->qs), _mm256_and_si256(h256, mh));
+        auto h256 = _mm256_sllv_epi64(_mm256_set1_epi64x(aux64), shift1);
+        return _mm256_or_si256(b4.dequant(x->qs), _mm256_and_si256(_mm256_srlv_epi64(h256, shift2), mh));
    }
 };