Add ARM Grace Blackwell (NVIDIA DGX Spark) support (#922)

This commit enables IQK quantization operations on ARM-based systems, specifically tested on NVIDIA DGX Spark with GB10 Grace Blackwell. Changes: - Enable IQK_IMPLEMENT macro for ARM NEON operations - Add arm_neon.h header include for ARM SIMD intrinsics - Fix compilation errors related to missing NEON types and functions Build requirements for ARM: cmake .. -DGGML_CUDA=ON \ -DCMAKE_CXX_FLAGS="-march=armv8.2-a+dotprod+fp16" \ -DCMAKE_C_FLAGS="-march=armv8.2-a+dotprod+fp16" Tested on: - Platform: NVIDIA DGX Spark (aarch64) - CPU: GB10 Grace Blackwell Superchip - Memory: 128GB unified memory Fixes build errors: - 'float32x4_t' does not name a type - 'vld1q_f32' was not declared in this scope - 'v_expf' was not declared in this scope - Missing FP16 NEON intrinsics
2026-01-26 17:20:01 +00:00 · 2025-11-09 07:22:40 -05:00
parent 73c28dbef4
commit fd37776584
1 changed files with 6 additions and 0 deletions
--- a/ggml/src/iqk/iqk_cpu_ops.cpp
+++ b/ggml/src/iqk/iqk_cpu_ops.cpp
@@ -4,6 +4,8 @@
 // SPDX-License-Identifier: MIT
 //

+#define IQK_IMPLEMENT
+
 #include "iqk_cpu_ops.h"
 #include "iqk_utils.h"
 #include "ggml.h"
@@ -14,6 +16,10 @@
 #include <cmath>
 #include <cstring>

+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#endif
+
 namespace {
 // Playing around with group scores: use sum of probabilities in the group
 inline float group_score(int n_per_group, const float * data) {