Fix ARM_NEON build failure due to q8_2 (#303)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-03-13 07:20:15 +00:00 · 2025-04-01 13:48:20 +02:00
parent 1bc60d6cc9
commit df20261b6a
2 changed files with 4 additions and 4 deletions
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -174,8 +174,8 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
            // this should never happen
            fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
                    __func__, size, max_avail);
-            fprintf(stderr, "%s: tensor was %s with %zu elements and %zu bytes\n", __func__, tensor->name,
-                    ggml_nelements(tensor), ggml_nbytes(tensor));
+            fprintf(stderr, "%s: tensor was %s with %g elements and %zu bytes\n", __func__, tensor->name,
+                    1.*ggml_nelements(tensor), ggml_nbytes(tensor));
            GGML_ABORT("not enough space in the buffer");
        }
    }
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -860,9 +860,9 @@ void quantize_row_q8_1_x4_T(const float * x, Block * y, int64_t k) {
            }
        } else {
            if (i < nb4) {
-                y4[i4].s[ir] = vaddvq_s32(accv);
+                y4[i4].d[ir+4] = GGML_FP32_TO_BF16(d * vaddvq_s32(accv)).bits;
            } else {
-                y[i].s = vaddvq_s32(accv);
+                y[i].s = GGML_FP32_TO_BF16(d * vaddvq_s32(accv)).bits;
            }
        }
    }