mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 10:51:51 +00:00
* Fix #217 * Remove stuff commited by mistake --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -15841,23 +15841,18 @@ struct FlashQKfp32 {
|
|||||||
#endif
|
#endif
|
||||||
constexpr int qrem = q_step - nrc_q*(q_step/nrc_q);
|
constexpr int qrem = q_step - nrc_q*(q_step/nrc_q);
|
||||||
constexpr int krem = k_step - nrc_k*(k_step/nrc_k);
|
constexpr int krem = k_step - nrc_k*(k_step/nrc_k);
|
||||||
|
static_assert(krem == 0);
|
||||||
DataInfo info{fms.cache, (const char *)q, k_step, stride_q*sizeof(q_float), 0, 1, nullptr};
|
DataInfo info{fms.cache, (const char *)q, k_step, stride_q*sizeof(q_float), 0, 1, nullptr};
|
||||||
for (int iq = 0; iq < q_step/nrc_q; ++iq) {
|
for (int iq = 0; iq < q_step/nrc_q; ++iq) {
|
||||||
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
||||||
mul_mat_Qx_Qy_MxN_fa4<QFT<q_float, nrc_q>, QFT<ggml_half, nrc_k>>(D, kh.block, kh.stride, ik*nrc_k, info);
|
mul_mat_Qx_Qy_MxN_fa4<QFT<q_float, nrc_q>, QFT<ggml_half, nrc_k>>(D, kh.block, kh.stride, ik*nrc_k, info);
|
||||||
}
|
}
|
||||||
if constexpr (krem > 0) {
|
|
||||||
mul_mat_Qx_Qy_MxN_fa<QFT<q_float, nrc_q>, QFT<ggml_half, krem>>(D, kh.block, kh.stride, k_step - krem, info);
|
|
||||||
}
|
|
||||||
info.cur_y += nrc_q;
|
info.cur_y += nrc_q;
|
||||||
}
|
}
|
||||||
if constexpr (qrem > 0) {
|
if constexpr (qrem > 0) {
|
||||||
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
||||||
mul_mat_Qx_Qy_MxN_fa4<QFT<q_float, qrem>, QFT<ggml_half, nrc_k>>(D, kh.block, kh.stride, ik*nrc_k, info);
|
mul_mat_Qx_Qy_MxN_fa4<QFT<q_float, qrem>, QFT<ggml_half, nrc_k>>(D, kh.block, kh.stride, ik*nrc_k, info);
|
||||||
}
|
}
|
||||||
if constexpr (krem > 0) {
|
|
||||||
mul_mat_Qx_Qy_MxN_fa<QFT<q_float, qrem>, QFT<ggml_half, krem>>(D, kh.block, kh.stride, k_step - krem, info);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
F16::Data vk[k_step/F16::block_size];
|
F16::Data vk[k_step/F16::block_size];
|
||||||
for (int j = 0; j < q_step; ++j) {
|
for (int j = 0; j < q_step; ++j) {
|
||||||
@@ -15910,7 +15905,7 @@ struct FlashQKfp32 {
|
|||||||
constexpr int nrc_k = 8;
|
constexpr int nrc_k = 8;
|
||||||
#endif
|
#endif
|
||||||
static_assert(k_step%nrc_k == 0);
|
static_assert(k_step%nrc_k == 0);
|
||||||
int qrem = q_step - nrc_q*(q_step/nrc_q);
|
int qrem = nq - nrc_q*(nq/nrc_q);
|
||||||
DataInfo info{fms.cache, (const char *)q, k_step, stride_q*sizeof(q_float), 0, 1, nullptr};
|
DataInfo info{fms.cache, (const char *)q, k_step, stride_q*sizeof(q_float), 0, 1, nullptr};
|
||||||
for (int iq = 0; iq < nq/nrc_q; ++iq) {
|
for (int iq = 0; iq < nq/nrc_q; ++iq) {
|
||||||
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
for (int ik = 0; ik < k_step/nrc_k; ++ik) {
|
||||||
@@ -15960,7 +15955,7 @@ struct FlashQKfp32 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
F16::Data vk[k_step/F16::block_size];
|
F16::Data vk[k_step/F16::block_size];
|
||||||
for (int j = 0; j < q_step; ++j) {
|
for (int j = 0; j < nq; ++j) {
|
||||||
fms.update_M_S(j, vk, mask + stride_m*j);
|
fms.update_M_S(j, vk, mask + stride_m*j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user