Fix "make it work for row sizes that are multiple of 4 on NEON"

2026-02-08 23:40:10 +00:00 · 2024-07-22 12:28:18 +02:00
parent 847588cc92
commit 005674cecc
1 changed files with 1 additions and 1 deletions
--- a/iqk_mul_mat.cpp
+++ b/iqk_mul_mat.cpp
@@ -4231,7 +4231,7 @@ template <int nrc> struct QF16 final : public QF16Base {
        for (int iy = 0; iy < nrc_y; ++iy) y[iy] = (const __fp16 *)(cx + iy*bx);
    }
    IQK_ALWAYS_INLINE Data load1(int iy, int i) const { return load(y[iy] + k_step*i); }
-    IQK_ALWAYS_INLINE Data load_tail(int iy, int i) const { return load4(y[iy] + k_step*i); }
+    IQK_ALWAYS_INLINE Data load_tail(int iy, int i) const { return load4(y[iy] + 4*i); }
    const __fp16 * y[nrc_y];
 };