diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index a03b6429..b2f11b2a 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -16841,6 +16841,9 @@ struct FlashQKfp32 { #ifdef __aarch64__ MAKE_FUNCS(mul_mat_qX_0_q8_0, 1); + if (nq == 2) return std::make_pair(mul_mat_qX_0_q8_2_Tx, 2); + if (nq == 4) return std::make_pair(mul_mat_qX_0_q8_2_Tx, 4); MAKE_FUNCS(mul_mat_qX_1_q8_2_T> || std::is_same_v> || std::is_same_v>) { - constexpr size_t kMaxOnStackSize = 18432; //576; + constexpr size_t kMaxOnStackSize = 576; auto q_size = q_step*(Dk/KHelper::block_size_q)*sizeof(typename KHelper::block_q8); q_size = GGML_PAD(q_size, 64); if (q_size > kMaxOnStackSize) {