mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Fix NEON build (#542)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -3908,6 +3908,23 @@ void mul_mat_iq5_k_r4_q8_k(int n, const void * vx, size_t bx, const DataInfo& in
|
||||
|
||||
}
|
||||
|
||||
bool iqk_convert_iqk_quants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) {
|
||||
if (n%QK_K != 0 || nrc_x%8 != 0) return false;
|
||||
return false;
|
||||
//switch (ggml_type(type)) {
|
||||
// case GGML_TYPE_IQ2_KS : iqk_convert_iq2_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ2_K : iqk_convert_iq2_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ3_K : iqk_convert_iq3_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ4_KS : iqk_convert_iq4_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ4_K : iqk_convert_iq4_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ5_KS : iqk_convert_iq5_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ5_K : iqk_convert_iq5_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ6_K : iqk_convert_iq6_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// default: return false;
|
||||
//}
|
||||
//return true;
|
||||
}
|
||||
|
||||
bool iqk_set_kernels_iqk_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) {
|
||||
|
||||
if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) {
|
||||
|
||||
@@ -3350,6 +3350,20 @@ static void mul_mat_iq3_s_r4_q8_k(int n, const void * vx, size_t bx, const DataI
|
||||
|
||||
}
|
||||
|
||||
bool iqk_convert_iquants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) {
|
||||
if (n%QK_K != 0 || nrc_x%8 != 0) return false;
|
||||
return false;
|
||||
//switch (ggml_type(type)) {
|
||||
// case GGML_TYPE_IQ2_XXS: iqk_convert_iq2_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ2_XS : iqk_convert_iq2_xs_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ2_S : iqk_convert_iq2_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ3_XXS: iqk_convert_iq3_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ3_S : iqk_convert_iq3_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break;
|
||||
// default: return false;
|
||||
//}
|
||||
//return true;
|
||||
}
|
||||
|
||||
bool iqk_set_kernels_iquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) {
|
||||
|
||||
if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) {
|
||||
|
||||
@@ -3704,6 +3704,20 @@ void mul_mat_q8_KV_r8_q8_KV(int n, const void * vx, size_t bx, const DataInfo& i
|
||||
|
||||
}
|
||||
|
||||
bool iqk_convert_kquants_q8X_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) {
|
||||
return false;
|
||||
//switch (ggml_type(type)) {
|
||||
// case GGML_TYPE_Q2_K: iqk_convert_q2_k_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q3_K: iqk_convert_q3_k_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q4_K: iqk_convert_q4_k_q8_1_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q5_K: iqk_convert_q5_k_q8_1_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q6_K: iqk_convert_q6_k_q8_0_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ4_XS: iqk_convert_iq4_xs_q8_k_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// default: return false;
|
||||
//}
|
||||
//return true;
|
||||
}
|
||||
|
||||
bool iqk_set_kernels_kquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) {
|
||||
|
||||
auto etypeA = ggml_type(typeA);
|
||||
|
||||
@@ -1615,17 +1615,17 @@ struct Trellis3 {
|
||||
return result;
|
||||
}
|
||||
inline int8x16x2_t next32(const uint16_t * val, uint32_t v0) const {
|
||||
auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3);
|
||||
auto vka3 = vdupq_n_u32(ka3);
|
||||
int8x16x2_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126)};
|
||||
int8x16x2_t i8;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+0]+v0));
|
||||
i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]);
|
||||
i8.val[1] = vmulq_u32(vka3, i8.val[0]);
|
||||
i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f));
|
||||
auto s1 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1]));
|
||||
i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+1]+v0));
|
||||
i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]);
|
||||
i8.val[1] = vmulq_u32(vka3, i8.val[0]);
|
||||
i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f));
|
||||
auto s2 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1]));
|
||||
@@ -1634,11 +1634,11 @@ struct Trellis3 {
|
||||
return result;
|
||||
}
|
||||
inline int8x16x4_t next64(const uint32_t * val) const {
|
||||
auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3);
|
||||
auto vka3 = vdupq_n_u32(ka3);
|
||||
int8x16x4_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126)};
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
auto i8_1 = next8(val[4*i+0], val[4*i+1]);
|
||||
int8x16x2_t i8_2{vmlaq_u32(vkb3, vka3, i8_1.val[0]), vmlaq_u32(vkb3, vka3, i8_1.val[1])};
|
||||
int8x16x2_t i8_2{vmulq_u32(vka3, i8_1.val[0]), vmulq_u32(vka3, i8_1.val[1])};
|
||||
i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
@@ -1646,8 +1646,8 @@ struct Trellis3 {
|
||||
auto s1_1 = vpaddq_s8(vreinterpretq_s8_u32(i8_1.val[0]), vreinterpretq_s8_u32(i8_1.val[1]));
|
||||
auto s1_2 = vpaddq_s8(vreinterpretq_s8_u32(i8_2.val[0]), vreinterpretq_s8_u32(i8_2.val[1]));
|
||||
i8_1 = next8(val[4*i+2], val[4*i+3]);
|
||||
i8_2.val[0] = vmlaq_u32(vkb3, vka3, i8_1.val[0]);
|
||||
i8_2.val[1] = vmlaq_u32(vkb3, vka3, i8_1.val[1]);
|
||||
i8_2.val[0] = vmulq_u32(vka3, i8_1.val[0]);
|
||||
i8_2.val[1] = vmulq_u32(vka3, i8_1.val[1]);
|
||||
i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f));
|
||||
i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f));
|
||||
|
||||
@@ -2784,6 +2784,21 @@ void mul_mat_q8_0_r8_q8_0(int n, const void * vx, size_t bx, const DataInfo& inf
|
||||
|
||||
}
|
||||
|
||||
bool iqk_convert_legacy_quants_q8_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) {
|
||||
return false;
|
||||
//switch (type) {
|
||||
// case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8<block_q4_0, Q4_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8<block_q4_1, Q4_1_Dequantizer>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8<block_q5_0, Q5_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8<block_q5_1, Q5_1_Dequantizer<block_q5_1>>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8<block_q6_0, Q6_0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8<block_iq4_nl, IQ4_NL0_Dequantizer>(n, vx, bx, vy, nrc_x); break;
|
||||
// case GGML_TYPE_Q8_0 : iqk_convert_q80_q80_r8(n, vx, bx, vy, nrc_x); break;
|
||||
// default: return false;
|
||||
//}
|
||||
//return true;
|
||||
}
|
||||
|
||||
bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) {
|
||||
|
||||
if (ne00%QK8_0 != 0) return false;
|
||||
|
||||
Reference in New Issue
Block a user