Adapting iq1_bn, iq2_bn: NEON

2026-02-25 15:44:10 +00:00 · 2024-10-23 20:16:09 +02:00
parent 6191518aac
commit 6ef979b7bf
1 changed files with 15 additions and 11 deletions
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -6638,11 +6638,13 @@ static void mul_mat_iq1bn_q8_K64(int n, const void * vx, size_t bx, const DataIn

    float scale;
    ggml_half d16;
+    char * c16 = (char *)&d16;

    for (int ix = 0; ix < nrc_x; ++ix) {

        const char * cx = ((const char *)vx + ix*bx);
-        std::memcpy(&d16, cx, sizeof(d16));
+        c16[0] = cx[0]; c16[1] = cx[1];
+        //std::memcpy(&d16, cx, sizeof(d16));
        cx += sizeof(d16);
        scale = GGML_FP16_TO_FP32(d16);

@@ -6718,7 +6720,9 @@ static void mul_mat_iq2bn_q8_K64(int n, const void * vx, size_t bx, const DataIn

    for (int ix = 0; ix < nrc_x; ++ix) {

-        const block_iq2_bn * x = (const block_iq2_bn *)((const char *)vx + ix*bx);
+        const float * dptr = (const float *)((const char *)vx + ix*bx);
+        const float d = *dptr;
+        const block_iq2_bn * x = (const block_iq2_bn *)(dptr + 1);

        if constexpr (nrc_y == 1) {
            int8x16x4_t v1;
@@ -6781,7 +6785,7 @@ static void mul_mat_iq2bn_q8_K64(int n, const void * vx, size_t bx, const DataIn
        }

        for (int iy = 0; iy < nrc_y; ++iy) {
-            info.store(ix, iy, -vaddvq_f32(vfmsq_f32(q8.minus(iy), q8.scale(iy), vcvtq_f32_s32(accd[iy]))));
+            info.store(ix, iy, -d*vaddvq_f32(vfmsq_f32(q8.minus(iy), q8.scale(iy), vcvtq_f32_s32(accd[iy]))));
        }
    }
 }
@@ -6928,14 +6932,14 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& m, int /*Ny*/) {
            expected_Btype = GGML_TYPE_Q8_K64;
            break;
        case GGML_TYPE_IQ1_TN:
-            m.funcs[0] = mul_mat_iq1bn_q8_K64<1;
-            m.funcs[1] = mul_mat_iq1bn_q8_K64<2;
-            m.funcs[2] = mul_mat_iq1bn_q8_K64<3;
-            m.funcs[3] = mul_mat_iq1bn_q8_K64<4;
-            m.funcs[4] = mul_mat_iq1bn_q8_K64<5;
-            m.funcs[5] = mul_mat_iq1bn_q8_K64<6;
-            m.funcs[6] = mul_mat_iq1bn_q8_K64<7;
-            m.funcs[7] = mul_mat_iq1bn_q8_K64<8;
+            m.funcs[0] = mul_mat_iq1bn_q8_K64<1>;
+            m.funcs[1] = mul_mat_iq1bn_q8_K64<2>;
+            m.funcs[2] = mul_mat_iq1bn_q8_K64<3>;
+            m.funcs[3] = mul_mat_iq1bn_q8_K64<4>;
+            m.funcs[4] = mul_mat_iq1bn_q8_K64<5>;
+            m.funcs[5] = mul_mat_iq1bn_q8_K64<6>;
+            m.funcs[6] = mul_mat_iq1bn_q8_K64<7>;
+            m.funcs[7] = mul_mat_iq1bn_q8_K64<8>;
            expected_Btype = GGML_TYPE_Q8_K64;
            break;
        case GGML_TYPE_IQ2_BN: