Fix NEON build (#542)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-06-19 18:37:22 +03:00
committed by GitHub
parent d1f92e24d3
commit 1d6e143ecb
5 changed files with 67 additions and 7 deletions

View File

@@ -3908,6 +3908,23 @@ void mul_mat_iq5_k_r4_q8_k(int n, const void * vx, size_t bx, const DataInfo& in
}
bool iqk_convert_iqk_quants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) {
if (n%QK_K != 0 || nrc_x%8 != 0) return false;
return false;
//switch (ggml_type(type)) {
// case GGML_TYPE_IQ2_KS : iqk_convert_iq2_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ2_K : iqk_convert_iq2_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ3_K : iqk_convert_iq3_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ4_KS : iqk_convert_iq4_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ4_K : iqk_convert_iq4_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ5_KS : iqk_convert_iq5_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ5_K : iqk_convert_iq5_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ6_K : iqk_convert_iq6_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// default: return false;
//}
//return true;
}
bool iqk_set_kernels_iqk_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) {
if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) {

View File

@@ -3350,6 +3350,20 @@ static void mul_mat_iq3_s_r4_q8_k(int n, const void * vx, size_t bx, const DataI
}
bool iqk_convert_iquants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) {
if (n%QK_K != 0 || nrc_x%8 != 0) return false;
return false;
//switch (ggml_type(type)) {
// case GGML_TYPE_IQ2_XXS: iqk_convert_iq2_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ2_XS : iqk_convert_iq2_xs_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ2_S : iqk_convert_iq2_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ3_XXS: iqk_convert_iq3_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ3_S : iqk_convert_iq3_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
// default: return false;
//}
//return true;
}
bool iqk_set_kernels_iquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) {
if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) {

View File

@@ -3704,6 +3704,20 @@ void mul_mat_q8_KV_r8_q8_KV(int n, const void * vx, size_t bx, const DataInfo& i
}
bool iqk_convert_kquants_q8X_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) {
return false;
//switch (ggml_type(type)) {
// case GGML_TYPE_Q2_K: iqk_convert_q2_k_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q3_K: iqk_convert_q3_k_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q4_K: iqk_convert_q4_k_q8_1_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q5_K: iqk_convert_q5_k_q8_1_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q6_K: iqk_convert_q6_k_q8_0_r8(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ4_XS: iqk_convert_iq4_xs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
// default: return false;
//}
//return true;
}
bool iqk_set_kernels_kquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) {
auto etypeA = ggml_type(typeA);

View File

@@ -1615,17 +1615,17 @@ struct Trellis3 {
return result;
}
inline int8x16x2_t next32(const uint16_t * val, uint32_t v0) const {
auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3);
auto vka3 = vdupq_n_u32(ka3);
int8x16x2_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126)};
int8x16x2_t i8;
for (int i = 0; i < 2; ++i) {
i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+0]+v0));
i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]);
i8.val[1] = vmulq_u32(vka3, i8.val[0]);
i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f));
i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f));
auto s1 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1]));
i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+1]+v0));
i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]);
i8.val[1] = vmulq_u32(vka3, i8.val[0]);
i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f));
i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f));
auto s2 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1]));
@@ -1634,11 +1634,11 @@ struct Trellis3 {
return result;
}
inline int8x16x4_t next64(const uint32_t * val) const {
auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3);
auto vka3 = vdupq_n_u32(ka3);
int8x16x4_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126)};
for (int i = 0; i < 2; ++i) {
auto i8_1 = next8(val[4*i+0], val[4*i+1]);
int8x16x2_t i8_2{vmlaq_u32(vkb3, vka3, i8_1.val[0]), vmlaq_u32(vkb3, vka3, i8_1.val[1])};
int8x16x2_t i8_2{vmulq_u32(vka3, i8_1.val[0]), vmulq_u32(vka3, i8_1.val[1])};
i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f));
i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f));
i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f));
@@ -1646,8 +1646,8 @@ struct Trellis3 {
auto s1_1 = vpaddq_s8(vreinterpretq_s8_u32(i8_1.val[0]), vreinterpretq_s8_u32(i8_1.val[1]));
auto s1_2 = vpaddq_s8(vreinterpretq_s8_u32(i8_2.val[0]), vreinterpretq_s8_u32(i8_2.val[1]));
i8_1 = next8(val[4*i+2], val[4*i+3]);
i8_2.val[0] = vmlaq_u32(vkb3, vka3, i8_1.val[0]);
i8_2.val[1] = vmlaq_u32(vkb3, vka3, i8_1.val[1]);
i8_2.val[0] = vmulq_u32(vka3, i8_1.val[0]);
i8_2.val[1] = vmulq_u32(vka3, i8_1.val[1]);
i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f));
i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f));
i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f));

View File

@@ -2784,6 +2784,21 @@ void mul_mat_q8_0_r8_q8_0(int n, const void * vx, size_t bx, const DataInfo& inf
}
bool iqk_convert_legacy_quants_q8_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) {
return false;
//switch (type) {
// case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8<block_q4_0, Q4_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8<block_q4_1, Q4_1_Dequantizer>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8<block_q5_0, Q5_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8<block_q5_1, Q5_1_Dequantizer<block_q5_1>>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8<block_q6_0, Q6_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8<block_iq4_nl, IQ4_NL0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
// case GGML_TYPE_Q8_0 : iqk_convert_q80_q80_r8(n, vx, bx, vy, nrc_x); break;
// default: return false;
//}
//return true;
}
bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) {
if (ne00%QK8_0 != 0) return false;