From fd3777658419d3b6d2eb4bcf4da2e8dca4feb472 Mon Sep 17 00:00:00 2001 From: Lennart Lopin Date: Sun, 9 Nov 2025 07:22:40 -0500 Subject: [PATCH] Add ARM Grace Blackwell (NVIDIA DGX Spark) support (#922) This commit enables IQK quantization operations on ARM-based systems, specifically tested on NVIDIA DGX Spark with GB10 Grace Blackwell. Changes: - Enable IQK_IMPLEMENT macro for ARM NEON operations - Add arm_neon.h header include for ARM SIMD intrinsics - Fix compilation errors related to missing NEON types and functions Build requirements for ARM: cmake .. -DGGML_CUDA=ON \ -DCMAKE_CXX_FLAGS="-march=armv8.2-a+dotprod+fp16" \ -DCMAKE_C_FLAGS="-march=armv8.2-a+dotprod+fp16" Tested on: - Platform: NVIDIA DGX Spark (aarch64) - CPU: GB10 Grace Blackwell Superchip - Memory: 128GB unified memory Fixes build errors: - 'float32x4_t' does not name a type - 'vld1q_f32' was not declared in this scope - 'v_expf' was not declared in this scope - Missing FP16 NEON intrinsics --- ggml/src/iqk/iqk_cpu_ops.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ggml/src/iqk/iqk_cpu_ops.cpp b/ggml/src/iqk/iqk_cpu_ops.cpp index a27b0282..ab8590d7 100644 --- a/ggml/src/iqk/iqk_cpu_ops.cpp +++ b/ggml/src/iqk/iqk_cpu_ops.cpp @@ -4,6 +4,8 @@ // SPDX-License-Identifier: MIT // +#define IQK_IMPLEMENT + #include "iqk_cpu_ops.h" #include "iqk_utils.h" #include "ggml.h" @@ -14,6 +16,10 @@ #include #include +#ifdef __ARM_NEON +#include +#endif + namespace { // Playing around with group scores: use sum of probabilities in the group inline float group_score(int n_per_group, const float * data) {